Commit 3c7310f8cc

Jakub Konka <kubkon@jakubkonka.com>
2021-10-31 23:08:53
stage2 x86_64: add MIR->Isel lowering step for x86_64
* incorporate Andrew's MIR draft as Mir.zig * add skeleton for Emit.zig module - Emit will lower MIR into machine code or textual ASM. * implement push * implement ret * implement mov r/m, r * implement sub r/m imm and sub r/m, r * put encoding common ops together - some ops share impl such as MOV and cmp so put them together and vary the actual opcode with modRM ext only. * implement pop * implement movabs - movabs being a special-case of mov not handled by general mov MIR instruction due to requirement to handle 64bit immediates. * store imm64 as a struct `Imm64{ msb: u32, lsb: u32 }` in extra data for use with for instance movabs inst * implement more mov variations * implement adc * implement add * implement sub * implement xor * implement and * implement or * implement sbb * implement cmp * implement lea - lea doesn't follow the scheme as other inst above. Similarly, I think bit shifts and rotates should be put in a separate basket too. * implement adc_scale_src * implement add_scale_src * implement sub_scale_src * implement xor_scale_src * implement and_scale_src * implement or_scale_src * implement sbb_scale_src * implement cmp_scale_src * implement adc_scale_dst * implement add_scale_dst * implement sub_scale_dst * implement xor_scale_dst * implement and_scale_dst * implement or_scale_dst * implement sbb_scale_dst * implement cmp_scale_dst * implement mov_scale_src * implement mov_scale_dst * implement adc_scale_imm * implement add_scale_imm * implement sub_scale_imm * implement xor_scale_imm * implement and_scale_imm * implement or_scale_imm * implement sbb_scale_imm * implement cmp_scale_imm * port bin math to MIR * backpatch stack size into prev MIR inst * implement Function.gen() (minus dbg info) * implement jmp/call [imm] - we can now call functions using indirect absolute addressing, or via registers. * port airRet to use MIR * port airLoop to use MIR * patch up performReloc to use inst indices * implement conditional jumps (without relocs) * implement set byte on condition * implement basic lea r64, [rip + imm] * implement calling externs * implement callq in PIE * implement lea RIP in PIE context * remove all refs to Encoder from CodeGen * implement basic imul ops * pass all Linux tests! * enable most of dbg info gen * generate arg dbg info in Emit
1 parent 4acd892
Changed files (5)
src/arch/x86_64/bits.zig
@@ -22,7 +22,7 @@ const DW = std.dwarf;
 ///
 /// The ID can be easily determined by figuring out what range the register is
 /// in, and then subtracting the base.
-pub const Register = enum(u8) {
+pub const Register = enum(u7) {
     // 0 through 15, 64-bit registers. 8-15 are extended.
     // id is just the int value.
     rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
@@ -43,6 +43,10 @@ pub const Register = enum(u8) {
     al, cl, dl, bl, ah, ch, dh, bh,
     r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b,
 
+    // Pseudo, used only for MIR to signify that the
+    // operand is not a register but an immediate, etc.
+    none,
+
     /// Returns the bit-width of the register.
     pub fn size(self: Register) u7 {
         return switch (@enumToInt(self)) {
@@ -73,7 +77,7 @@ pub const Register = enum(u8) {
     }
 
     /// Like id, but only returns the lower 3 bits.
-    pub fn low_id(self: Register) u3 {
+    pub fn lowId(self: Register) u3 {
         return @truncate(u3, @enumToInt(self));
     }
 
@@ -577,8 +581,8 @@ test "x86_64 Encoder helpers" {
         });
         encoder.opcode_2byte(0x0f, 0xaf);
         encoder.modRm_direct(
-            Register.eax.low_id(),
-            Register.edi.low_id(),
+            Register.eax.lowId(),
+            Register.edi.lowId(),
         );
 
         try testing.expectEqualSlices(u8, &[_]u8{ 0x0f, 0xaf, 0xc7 }, code.items);
@@ -597,8 +601,8 @@ test "x86_64 Encoder helpers" {
         });
         encoder.opcode_1byte(0x89);
         encoder.modRm_direct(
-            Register.edi.low_id(),
-            Register.eax.low_id(),
+            Register.edi.lowId(),
+            Register.eax.lowId(),
         );
 
         try testing.expectEqualSlices(u8, &[_]u8{ 0x89, 0xf8 }, code.items);
@@ -624,7 +628,7 @@ test "x86_64 Encoder helpers" {
         encoder.opcode_1byte(0x81);
         encoder.modRm_direct(
             0,
-            Register.rcx.low_id(),
+            Register.rcx.lowId(),
         );
         encoder.imm32(2147483647);
 
src/arch/x86_64/CodeGen.zig
@@ -14,11 +14,12 @@ const Allocator = mem.Allocator;
 const Compilation = @import("../../Compilation.zig");
 const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput;
 const DW = std.dwarf;
-const Encoder = @import("bits.zig").Encoder;
+const Emit = @import("Emit.zig");
 const ErrorMsg = Module.ErrorMsg;
 const FnResult = @import("../../codegen.zig").FnResult;
 const GenerateSymbolError = @import("../../codegen.zig").GenerateSymbolError;
 const Liveness = @import("../../Liveness.zig");
+const Mir = @import("Mir.zig");
 const Module = @import("../../Module.zig");
 const RegisterManager = @import("../../register_manager.zig").RegisterManager;
 const Target = std.Target;
@@ -32,15 +33,12 @@ const InnerError = error{
     CodegenFail,
 };
 
-arch: std.Target.Cpu.Arch,
 gpa: *Allocator,
 air: Air,
 liveness: Liveness,
 bin_file: *link.File,
 target: *const std.Target,
 mod_fn: *const Module.Fn,
-code: *std.ArrayList(u8),
-debug_output: DebugInfoOutput,
 err_msg: ?*ErrorMsg,
 args: []MCValue,
 ret_mcv: MCValue,
@@ -49,18 +47,19 @@ arg_index: usize,
 src_loc: Module.SrcLoc,
 stack_align: u32,
 
-prev_di_line: u32,
-prev_di_column: u32,
+/// MIR Instructions
+mir_instructions: std.MultiArrayList(Mir.Inst) = .{},
+/// MIR extra data
+mir_extra: std.ArrayListUnmanaged(u32) = .{},
+
 /// Byte offset within the source file of the ending curly.
 end_di_line: u32,
 end_di_column: u32,
-/// Relative to the beginning of `code`.
-prev_di_pc: usize,
 
 /// The value is an offset into the `Function` `code` from the beginning.
 /// To perform the reloc, write 32-bit signed little-endian integer
 /// which is a relative jump, based on the address following the reloc.
-exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .{},
+exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
 
 /// Whenever there is a runtime branch, we push a Branch onto this stack,
 /// and pop it off when the runtime branch joins. This provides an "overlay"
@@ -89,7 +88,7 @@ air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init,
 
 const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {};
 
-const MCValue = union(enum) {
+pub const MCValue = union(enum) {
     /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
     /// TODO Look into deleting this tag and using `dead` instead, since every use
     /// of MCValue.none should be instead looking at the type and noticing it is 0 bits.
@@ -178,7 +177,7 @@ const StackAllocation = struct {
 };
 
 const BlockData = struct {
-    relocs: std.ArrayListUnmanaged(Reloc),
+    relocs: std.ArrayListUnmanaged(Mir.Inst.Index),
     /// The first break instruction encounters `null` here and chooses a
     /// machine code value for the block result, populating this field.
     /// Following break instructions encounter that value and use it for
@@ -186,18 +185,6 @@ const BlockData = struct {
     mcv: MCValue,
 };
 
-const Reloc = union(enum) {
-    /// The value is an offset into the `Function` `code` from the beginning.
-    /// To perform the reloc, write 32-bit signed little-endian integer
-    /// which is a relative jump, based on the address following the reloc.
-    rel32: usize,
-    /// A branch in the ARM instruction set
-    arm_branch: struct {
-        pos: usize,
-        cond: @import("../../arch/arm/bits.zig").Condition,
-    },
-};
-
 const BigTomb = struct {
     function: *Self,
     inst: Air.Inst.Index,
@@ -238,7 +225,6 @@ const BigTomb = struct {
 const Self = @This();
 
 pub fn generate(
-    arch: std.Target.Cpu.Arch,
     bin_file: *link.File,
     src_loc: Module.SrcLoc,
     module_fn: *Module.Fn,
@@ -247,7 +233,7 @@ pub fn generate(
     code: *std.ArrayList(u8),
     debug_output: DebugInfoOutput,
 ) GenerateSymbolError!FnResult {
-    if (build_options.skip_non_native and builtin.cpu.arch != arch) {
+    if (build_options.skip_non_native and builtin.cpu.arch != bin_file.options.target.cpu.arch) {
         @panic("Attempted to compile for architecture that was disabled by build configuration");
     }
 
@@ -263,15 +249,12 @@ pub fn generate(
     try branch_stack.append(.{});
 
     var function = Self{
-        .arch = arch,
         .gpa = bin_file.allocator,
         .air = air,
         .liveness = liveness,
         .target = &bin_file.options.target,
         .bin_file = bin_file,
         .mod_fn = module_fn,
-        .code = code,
-        .debug_output = debug_output,
         .err_msg = null,
         .args = undefined, // populated after `resolveCallingConventionValues`
         .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
@@ -280,15 +263,14 @@ pub fn generate(
         .branch_stack = &branch_stack,
         .src_loc = src_loc,
         .stack_align = undefined,
-        .prev_di_pc = 0,
-        .prev_di_line = module_fn.lbrace_line,
-        .prev_di_column = module_fn.lbrace_column,
         .end_di_line = module_fn.rbrace_line,
         .end_di_column = module_fn.rbrace_column,
     };
     defer function.stack.deinit(bin_file.allocator);
     defer function.blocks.deinit(bin_file.allocator);
     defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
+    defer function.mir_instructions.deinit(bin_file.allocator);
+    defer function.mir_extra.deinit(bin_file.allocator);
 
     var call_info = function.resolveCallingConventionValues(fn_type) catch |err| switch (err) {
         error.CodegenFail => return FnResult{ .fail = function.err_msg.? },
@@ -306,6 +288,30 @@ pub fn generate(
         else => |e| return e,
     };
 
+    var mir = Mir{
+        .function = &function,
+        .instructions = function.mir_instructions.toOwnedSlice(),
+        .extra = function.mir_extra.toOwnedSlice(bin_file.allocator),
+    };
+    defer mir.deinit(bin_file.allocator);
+
+    var emit = Emit{
+        .mir = mir,
+        .bin_file = bin_file,
+        .debug_output = debug_output,
+        .target = &bin_file.options.target,
+        .src_loc = src_loc,
+        .code = code,
+        .prev_di_pc = 0,
+        .prev_di_line = module_fn.lbrace_line,
+        .prev_di_column = module_fn.lbrace_column,
+    };
+    defer emit.deinit();
+    emit.emitMir() catch |err| switch (err) {
+        error.EmitFail => return FnResult{ .fail = emit.err_msg.? },
+        else => |e| return e,
+    };
+
     if (function.err_msg) |em| {
         return FnResult{ .fail = em };
     } else {
@@ -313,71 +319,143 @@ pub fn generate(
     }
 }
 
-fn gen(self: *Self) !void {
-    try self.code.ensureUnusedCapacity(11);
+fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
+    const gpa = self.gpa;
+    try self.mir_instructions.ensureUnusedCapacity(gpa, 1);
+    const result_index = @intCast(Air.Inst.Index, self.mir_instructions.len);
+    self.mir_instructions.appendAssumeCapacity(inst);
+    return result_index;
+}
+
+pub fn addExtra(self: *Self, extra: anytype) Allocator.Error!u32 {
+    const fields = std.meta.fields(@TypeOf(extra));
+    try self.mir_extra.ensureUnusedCapacity(self.gpa, fields.len);
+    return self.addExtraAssumeCapacity(extra);
+}
 
+pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
+    const fields = std.meta.fields(@TypeOf(extra));
+    const result = @intCast(u32, self.mir_extra.items.len);
+    inline for (fields) |field| {
+        self.mir_extra.appendAssumeCapacity(switch (field.field_type) {
+            u32 => @field(extra, field.name),
+            i32 => @bitCast(u32, @field(extra, field.name)),
+            else => @compileError("bad field type"),
+        });
+    }
+    return result;
+}
+
+fn gen(self: *Self) InnerError!void {
     const cc = self.fn_type.fnCallingConvention();
     if (cc != .Naked) {
+        _ = try self.addInst(.{
+            .tag = .push,
+            .ops = (Mir.Ops{
+                .reg1 = .rbp,
+            }).encode(),
+            .data = undefined, // unused for push reg,
+        });
+        _ = try self.addInst(.{
+            .tag = .mov,
+            .ops = (Mir.Ops{
+                .reg1 = .rsp,
+                .reg2 = .rbp,
+            }).encode(),
+            .data = undefined,
+        });
         // We want to subtract the aligned stack frame size from rsp here, but we don't
         // yet know how big it will be, so we leave room for a 4-byte stack size.
         // TODO During semantic analysis, check if there are no function calls. If there
         // are none, here we can omit the part where we subtract and then add rsp.
-        self.code.appendSliceAssumeCapacity(&[_]u8{
-            0x55, // push rbp
-            0x48, 0x89, 0xe5, // mov rbp, rsp
-            0x48, 0x81, 0xec, // sub rsp, imm32 (with reloc)
+        const backpatch_reloc = try self.addInst(.{
+            .tag = .sub,
+            .ops = (Mir.Ops{
+                .reg1 = .rsp,
+            }).encode(),
+            .data = .{ .imm = 0 },
+        });
+
+        _ = try self.addInst(.{
+            .tag = .dbg_prologue_end,
+            .ops = undefined,
+            .data = undefined,
         });
-        const reloc_index = self.code.items.len;
-        self.code.items.len += 4;
 
-        try self.dbgSetPrologueEnd();
         try self.genBody(self.air.getMainBody());
 
         const stack_end = self.max_end_stack;
-        if (stack_end > math.maxInt(i32))
+        if (stack_end > math.maxInt(i32)) {
             return self.failSymbol("too much stack used in call parameters", .{});
+        }
         const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
-        mem.writeIntLittle(u32, self.code.items[reloc_index..][0..4], @intCast(u32, aligned_stack_end));
-
-        if (self.code.items.len >= math.maxInt(i32)) {
-            return self.failSymbol("unable to perform relocation: jump too far", .{});
+        if (aligned_stack_end > 0) {
+            self.mir_instructions.items(.data)[backpatch_reloc].imm = @intCast(i32, aligned_stack_end);
         }
+
         if (self.exitlude_jump_relocs.items.len == 1) {
-            self.code.items.len -= 5;
+            self.mir_instructions.len -= 1;
         } else for (self.exitlude_jump_relocs.items) |jmp_reloc| {
-            const amt = self.code.items.len - (jmp_reloc + 4);
-            const s32_amt = @intCast(i32, amt);
-            mem.writeIntLittle(i32, self.code.items[jmp_reloc..][0..4], s32_amt);
+            self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len);
         }
 
-        // Important to be after the possible self.code.items.len -= 5 above.
-        try self.dbgSetEpilogueBegin();
-
-        try self.code.ensureUnusedCapacity(9);
-        // add rsp, x
-        if (aligned_stack_end > math.maxInt(i8)) {
-            // example: 48 81 c4 ff ff ff 7f  add    rsp,0x7fffffff
-            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xc4 });
-            const x = @intCast(u32, aligned_stack_end);
-            mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
-        } else if (aligned_stack_end != 0) {
-            // example: 48 83 c4 7f           add    rsp,0x7f
-            const x = @intCast(u8, aligned_stack_end);
-            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xc4, x });
+        _ = try self.addInst(.{
+            .tag = .dbg_epilogue_begin,
+            .ops = undefined,
+            .data = undefined,
+        });
+
+        if (aligned_stack_end > 0) {
+            // add rsp, x
+            _ = try self.addInst(.{
+                .tag = .add,
+                .ops = (Mir.Ops{
+                    .reg1 = .rsp,
+                }).encode(),
+                .data = .{ .imm = @intCast(i32, aligned_stack_end) },
+            });
         }
 
-        self.code.appendSliceAssumeCapacity(&[_]u8{
-            0x5d, // pop rbp
-            0xc3, // ret
+        _ = try self.addInst(.{
+            .tag = .pop,
+            .ops = (Mir.Ops{
+                .reg1 = .rbp,
+            }).encode(),
+            .data = undefined,
+        });
+        _ = try self.addInst(.{
+            .tag = .ret,
+            .ops = (Mir.Ops{
+                .flags = 0b11,
+            }).encode(),
+            .data = undefined,
         });
     } else {
-        try self.dbgSetPrologueEnd();
+        _ = try self.addInst(.{
+            .tag = .dbg_prologue_end,
+            .ops = undefined,
+            .data = undefined,
+        });
+
         try self.genBody(self.air.getMainBody());
-        try self.dbgSetEpilogueBegin();
+
+        _ = try self.addInst(.{
+            .tag = .dbg_epilogue_begin,
+            .ops = undefined,
+            .data = undefined,
+        });
     }
 
     // Drop them off at the rbrace.
-    try self.dbgAdvancePCAndLine(self.end_di_line, self.end_di_column);
+    const payload = try self.addExtra(Mir.DbgLineColumn{
+        .line = self.end_di_line,
+        .column = self.end_di_column,
+    });
+    _ = try self.addInst(.{
+        .tag = .dbg_line,
+        .ops = undefined,
+        .data = .{ .payload = payload },
+    });
 }
 
 fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
@@ -518,79 +596,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
     }
 }
 
-fn dbgSetPrologueEnd(self: *Self) InnerError!void {
-    switch (self.debug_output) {
-        .dwarf => |dbg_out| {
-            try dbg_out.dbg_line.append(DW.LNS.set_prologue_end);
-            try self.dbgAdvancePCAndLine(self.prev_di_line, self.prev_di_column);
-        },
-        .plan9 => {},
-        .none => {},
-    }
-}
-
-fn dbgSetEpilogueBegin(self: *Self) InnerError!void {
-    switch (self.debug_output) {
-        .dwarf => |dbg_out| {
-            try dbg_out.dbg_line.append(DW.LNS.set_epilogue_begin);
-            try self.dbgAdvancePCAndLine(self.prev_di_line, self.prev_di_column);
-        },
-        .plan9 => {},
-        .none => {},
-    }
-}
-
-fn dbgAdvancePCAndLine(self: *Self, line: u32, column: u32) InnerError!void {
-    const delta_line = @intCast(i32, line) - @intCast(i32, self.prev_di_line);
-    const delta_pc: usize = self.code.items.len - self.prev_di_pc;
-    switch (self.debug_output) {
-        .dwarf => |dbg_out| {
-            // TODO Look into using the DWARF special opcodes to compress this data.
-            // It lets you emit single-byte opcodes that add different numbers to
-            // both the PC and the line number at the same time.
-            try dbg_out.dbg_line.ensureUnusedCapacity(11);
-            dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_pc);
-            leb128.writeULEB128(dbg_out.dbg_line.writer(), delta_pc) catch unreachable;
-            if (delta_line != 0) {
-                dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_line);
-                leb128.writeILEB128(dbg_out.dbg_line.writer(), delta_line) catch unreachable;
-            }
-            dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.copy);
-            self.prev_di_pc = self.code.items.len;
-            self.prev_di_line = line;
-            self.prev_di_column = column;
-            self.prev_di_pc = self.code.items.len;
-        },
-        .plan9 => |dbg_out| {
-            if (delta_pc <= 0) return; // only do this when the pc changes
-            // we have already checked the target in the linker to make sure it is compatable
-            const quant = @import("../../link/Plan9/aout.zig").getPCQuant(self.target.cpu.arch) catch unreachable;
-
-            // increasing the line number
-            try @import("../../link/Plan9.zig").changeLine(dbg_out.dbg_line, delta_line);
-            // increasing the pc
-            const d_pc_p9 = @intCast(i64, delta_pc) - quant;
-            if (d_pc_p9 > 0) {
-                // minus one because if its the last one, we want to leave space to change the line which is one quanta
-                try dbg_out.dbg_line.append(@intCast(u8, @divExact(d_pc_p9, quant) + 128) - quant);
-                if (dbg_out.pcop_change_index.*) |pci|
-                    dbg_out.dbg_line.items[pci] += 1;
-                dbg_out.pcop_change_index.* = @intCast(u32, dbg_out.dbg_line.items.len - 1);
-            } else if (d_pc_p9 == 0) {
-                // we don't need to do anything, because adding the quant does it for us
-            } else unreachable;
-            if (dbg_out.start_line.* == null)
-                dbg_out.start_line.* = self.prev_di_line;
-            dbg_out.end_line.* = line;
-            // only do this if the pc changed
-            self.prev_di_line = line;
-            self.prev_di_column = column;
-            self.prev_di_pc = self.code.items.len;
-        },
-        .none => {},
-    }
-}
-
 /// Asserts there is already capacity to insert into top branch inst_table.
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
     const air_tags = self.air.instructions.items(.tag);
@@ -654,29 +659,6 @@ fn ensureProcessDeathCapacity(self: *Self, additional_count: usize) !void {
     try table.ensureUnusedCapacity(self.gpa, additional_count);
 }
 
-/// Adds a Type to the .debug_info at the current position. The bytes will be populated later,
-/// after codegen for this symbol is done.
-fn addDbgInfoTypeReloc(self: *Self, ty: Type) !void {
-    switch (self.debug_output) {
-        .dwarf => |dbg_out| {
-            assert(ty.hasCodeGenBits());
-            const index = dbg_out.dbg_info.items.len;
-            try dbg_out.dbg_info.resize(index + 4); // DW.AT.type,  DW.FORM.ref4
-
-            const gop = try dbg_out.dbg_info_type_relocs.getOrPut(self.gpa, ty);
-            if (!gop.found_existing) {
-                gop.value_ptr.* = .{
-                    .off = undefined,
-                    .relocs = .{},
-                };
-            }
-            try gop.value_ptr.relocs.append(self.gpa, @intCast(u32, index));
-        },
-        .plan9 => {},
-        .none => {},
-    }
-}
-
 fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u32 {
     if (abi_align > self.stack_align)
         self.stack_align = abi_align;
@@ -848,7 +830,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void {
             },
             else => {},
         }
-        break :result try self.genX8664BinMath(inst, ty_op.operand, .bool_true);
+        break :result try self.genBinMathOp(inst, ty_op.operand, .bool_true);
     };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
@@ -886,7 +868,7 @@ fn airAdd(self: *Self, inst: Air.Inst.Index) !void {
     const result: MCValue = if (self.liveness.isUnused(inst))
         .dead
     else
-        try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs);
+        try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs);
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -913,7 +895,7 @@ fn airSub(self: *Self, inst: Air.Inst.Index) !void {
     const result: MCValue = if (self.liveness.isUnused(inst))
         .dead
     else
-        try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs);
+        try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs);
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -940,7 +922,7 @@ fn airMul(self: *Self, inst: Air.Inst.Index) !void {
     const result: MCValue = if (self.liveness.isUnused(inst))
         .dead
     else
-        try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs);
+        try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs);
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -994,7 +976,7 @@ fn airBitAnd(self: *Self, inst: Air.Inst.Index) !void {
     const result: MCValue = if (self.liveness.isUnused(inst))
         .dead
     else
-        try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs);
+        try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs);
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -1003,7 +985,7 @@ fn airBitOr(self: *Self, inst: Air.Inst.Index) !void {
     const result: MCValue = if (self.liveness.isUnused(inst))
         .dead
     else
-        try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs);
+        try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs);
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -1415,7 +1397,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
 /// Perform "binary" operators, excluding comparisons.
 /// Currently, the following ops are supported:
 /// ADD, SUB, XOR, OR, AND
-fn genX8664BinMath(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Air.Inst.Ref) !MCValue {
+fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: Air.Inst.Ref) !MCValue {
     // We'll handle these ops in two steps.
     // 1) Prepare an output location (register or memory)
     //    This location will be the location of the operand that dies (if one exists)
@@ -1425,9 +1407,6 @@ fn genX8664BinMath(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_r
     //    In this case, copy that location to a register, then perform the op to that register instead.
     //
     // TODO: make this algorithm less bad
-
-    try self.code.ensureUnusedCapacity(8);
-
     const lhs = try self.resolveInst(op_lhs);
     const rhs = try self.resolveInst(op_rhs);
 
@@ -1486,107 +1465,28 @@ fn genX8664BinMath(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_r
         else => {},
     }
 
-    // Now for step 2, we perform the actual op
-    const inst_ty = self.air.typeOfIndex(inst);
+    // Now for step 2, we assing an MIR instruction
+    const dst_ty = self.air.typeOfIndex(inst);
     const air_tags = self.air.instructions.items(.tag);
     switch (air_tags[inst]) {
-        // TODO: Generate wrapping and non-wrapping versions separately
-        .add, .addwrap => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 0, 0x00),
-        .bool_or, .bit_or => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 1, 0x08),
-        .bool_and, .bit_and => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 4, 0x20),
-        .sub, .subwrap => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 5, 0x28),
-        .xor, .not => try self.genX8664BinMathCode(inst_ty, dst_mcv, src_mcv, 6, 0x30),
-
-        .mul, .mulwrap => try self.genX8664Imul(inst_ty, dst_mcv, src_mcv),
+        .add, .addwrap => try self.genBinMathOpMir(.add, dst_ty, dst_mcv, src_mcv),
+        .bool_or, .bit_or => try self.genBinMathOpMir(.@"or", dst_ty, dst_mcv, src_mcv),
+        .bool_and, .bit_and => try self.genBinMathOpMir(.@"and", dst_ty, dst_mcv, src_mcv),
+        .sub, .subwrap => try self.genBinMathOpMir(.sub, dst_ty, dst_mcv, src_mcv),
+        .xor, .not => try self.genBinMathOpMir(.xor, dst_ty, dst_mcv, src_mcv),
+        .mul, .mulwrap => try self.genIMulOpMir(dst_ty, dst_mcv, src_mcv),
         else => unreachable,
     }
 
     return dst_mcv;
 }
 
-/// Wrap over Instruction.encodeInto to translate errors
-fn encodeX8664Instruction(self: *Self, inst: Instruction) !void {
-    inst.encodeInto(self.code) catch |err| {
-        if (err == error.OutOfMemory)
-            return error.OutOfMemory
-        else
-            return self.fail("Instruction.encodeInto failed because {s}", .{@errorName(err)});
-    };
-}
-
-/// This function encodes a binary operation for x86_64
-/// intended for use with the following opcode ranges
-/// because they share the same structure.
-///
-/// Thus not all binary operations can be used here
-/// -- multiplication needs to be done with imul,
-/// which doesn't have as convenient an interface.
-///
-/// "opx"-style instructions use the opcode extension field to indicate which instruction to execute:
-///
-/// opx = /0: add
-/// opx = /1: or
-/// opx = /2: adc
-/// opx = /3: sbb
-/// opx = /4: and
-/// opx = /5: sub
-/// opx = /6: xor
-/// opx = /7: cmp
-///
-/// opcode  | operand shape
-/// --------+----------------------
-/// 80 /opx | *r/m8*,        imm8
-/// 81 /opx | *r/m16/32/64*, imm16/32
-/// 83 /opx | *r/m16/32/64*, imm8
-///
-/// "mr"-style instructions use the low bits of opcode to indicate shape of instruction:
-///
-/// mr = 00: add
-/// mr = 08: or
-/// mr = 10: adc
-/// mr = 18: sbb
-/// mr = 20: and
-/// mr = 28: sub
-/// mr = 30: xor
-/// mr = 38: cmp
-///
-/// opcode | operand shape
-/// -------+-------------------------
-/// mr + 0 | *r/m8*,        r8
-/// mr + 1 | *r/m16/32/64*, r16/32/64
-/// mr + 2 | *r8*,          r/m8
-/// mr + 3 | *r16/32/64*,   r/m16/32/64
-/// mr + 4 | *AL*,          imm8
-/// mr + 5 | *rAX*,         imm16/32
-///
-/// TODO: rotates and shifts share the same structure, so we can potentially implement them
-///       at a later date with very similar code.
-///       They have "opx"-style instructions, but no "mr"-style instructions.
-///
-/// opx = /0: rol,
-/// opx = /1: ror,
-/// opx = /2: rcl,
-/// opx = /3: rcr,
-/// opx = /4: shl sal,
-/// opx = /5: shr,
-/// opx = /6: sal shl,
-/// opx = /7: sar,
-///
-/// opcode  | operand shape
-/// --------+------------------
-/// c0 /opx | *r/m8*,        imm8
-/// c1 /opx | *r/m16/32/64*, imm8
-/// d0 /opx | *r/m8*,        1
-/// d1 /opx | *r/m16/32/64*, 1
-/// d2 /opx | *r/m8*,        CL    (for context, CL is register 1)
-/// d3 /opx | *r/m16/32/64*, CL    (for context, CL is register 1)
-fn genX8664BinMathCode(
+fn genBinMathOpMir(
     self: *Self,
+    mir_tag: Mir.Inst.Tag,
     dst_ty: Type,
     dst_mcv: MCValue,
     src_mcv: MCValue,
-    opx: u3,
-    mr: u8,
 ) !void {
     switch (dst_mcv) {
         .none => unreachable,
@@ -1604,84 +1504,43 @@ fn genX8664BinMathCode(
                 .ptr_stack_offset => unreachable,
                 .ptr_embedded_in_code => unreachable,
                 .register => |src_reg| {
-                    // for register, register use mr + 1
-                    // addressing mode: *r/m16/32/64*, r16/32/64
-                    const abi_size = dst_ty.abiSize(self.target.*);
-                    const encoder = try Encoder.init(self.code, 3);
-                    encoder.rex(.{
-                        .w = abi_size == 8,
-                        .r = src_reg.isExtended(),
-                        .b = dst_reg.isExtended(),
+                    _ = try self.addInst(.{
+                        .tag = mir_tag,
+                        .ops = (Mir.Ops{
+                            .reg1 = src_reg,
+                            .reg2 = dst_reg,
+                            .flags = 0b11,
+                        }).encode(),
+                        .data = undefined,
                     });
-                    encoder.opcode_1byte(mr + 1);
-                    encoder.modRm_direct(
-                        src_reg.low_id(),
-                        dst_reg.low_id(),
-                    );
                 },
                 .immediate => |imm| {
-                    // register, immediate use opx = 81 or 83 addressing modes:
-                    // opx = 81: r/m16/32/64, imm16/32
-                    // opx = 83: r/m16/32/64, imm8
-                    const imm32 = @intCast(i32, imm); // This case must be handled before calling genX8664BinMathCode.
-                    if (imm32 <= math.maxInt(i8)) {
-                        const abi_size = dst_ty.abiSize(self.target.*);
-                        const encoder = try Encoder.init(self.code, 4);
-                        encoder.rex(.{
-                            .w = abi_size == 8,
-                            .b = dst_reg.isExtended(),
-                        });
-                        encoder.opcode_1byte(0x83);
-                        encoder.modRm_direct(
-                            opx,
-                            dst_reg.low_id(),
-                        );
-                        encoder.imm8(@intCast(i8, imm32));
-                    } else {
-                        const abi_size = dst_ty.abiSize(self.target.*);
-                        const encoder = try Encoder.init(self.code, 7);
-                        encoder.rex(.{
-                            .w = abi_size == 8,
-                            .b = dst_reg.isExtended(),
-                        });
-                        encoder.opcode_1byte(0x81);
-                        encoder.modRm_direct(
-                            opx,
-                            dst_reg.low_id(),
-                        );
-                        encoder.imm32(@intCast(i32, imm32));
-                    }
+                    _ = try self.addInst(.{
+                        .tag = mir_tag,
+                        .ops = (Mir.Ops{
+                            .reg1 = dst_reg,
+                        }).encode(),
+                        .data = .{ .imm = @intCast(i32, imm) },
+                    });
                 },
                 .embedded_in_code, .memory => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{});
                 },
                 .stack_offset => |off| {
-                    // register, indirect use mr + 3
-                    // addressing mode: *r16/32/64*, r/m16/32/64
-                    const abi_size = dst_ty.abiSize(self.target.*);
-                    const adj_off = off + abi_size;
                     if (off > math.maxInt(i32)) {
                         return self.fail("stack offset too large", .{});
                     }
-                    const encoder = try Encoder.init(self.code, 7);
-                    encoder.rex(.{
-                        .w = abi_size == 8,
-                        .r = dst_reg.isExtended(),
+                    const abi_size = dst_ty.abiSize(self.target.*);
+                    const adj_off = off + abi_size;
+                    _ = try self.addInst(.{
+                        .tag = mir_tag,
+                        .ops = (Mir.Ops{
+                            .reg1 = dst_reg,
+                            .reg2 = .ebp,
+                            .flags = 0b01,
+                        }).encode(),
+                        .data = .{ .imm = -@intCast(i32, adj_off) },
                     });
-                    encoder.opcode_1byte(mr + 3);
-                    if (adj_off <= std.math.maxInt(i8)) {
-                        encoder.modRm_indirectDisp8(
-                            dst_reg.low_id(),
-                            Register.ebp.low_id(),
-                        );
-                        encoder.disp8(-@intCast(i8, adj_off));
-                    } else {
-                        encoder.modRm_indirectDisp32(
-                            dst_reg.low_id(),
-                            Register.ebp.low_id(),
-                        );
-                        encoder.disp32(-@intCast(i32, adj_off));
-                    }
                 },
                 .compare_flags_unsigned => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{});
@@ -1699,7 +1558,20 @@ fn genX8664BinMathCode(
                 .ptr_stack_offset => unreachable,
                 .ptr_embedded_in_code => unreachable,
                 .register => |src_reg| {
-                    try self.genX8664ModRMRegToStack(dst_ty, off, src_reg, mr + 0x1);
+                    if (off > math.maxInt(i32)) {
+                        return self.fail("stack offset too large", .{});
+                    }
+                    const abi_size = dst_ty.abiSize(self.target.*);
+                    const adj_off = off + abi_size;
+                    _ = try self.addInst(.{
+                        .tag = mir_tag,
+                        .ops = (Mir.Ops{
+                            .reg1 = src_reg,
+                            .reg2 = .ebp,
+                            .flags = 0b10,
+                        }).encode(),
+                        .data = .{ .imm = -@intCast(i32, adj_off) },
+                    });
                 },
                 .immediate => |imm| {
                     _ = imm;
@@ -1722,13 +1594,8 @@ fn genX8664BinMathCode(
     }
 }
 
-/// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
-fn genX8664Imul(
-    self: *Self,
-    dst_ty: Type,
-    dst_mcv: MCValue,
-    src_mcv: MCValue,
-) !void {
+// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
+fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void {
     switch (dst_mcv) {
         .none => unreachable,
         .undef => unreachable,
@@ -1746,68 +1613,30 @@ fn genX8664Imul(
                 .ptr_embedded_in_code => unreachable,
                 .register => |src_reg| {
                     // register, register
-                    //
-                    // Use the following imul opcode
-                    // 0F AF /r: IMUL r32/64, r/m32/64
-                    const abi_size = dst_ty.abiSize(self.target.*);
-                    const encoder = try Encoder.init(self.code, 4);
-                    encoder.rex(.{
-                        .w = abi_size == 8,
-                        .r = dst_reg.isExtended(),
-                        .b = src_reg.isExtended(),
+                    _ = try self.addInst(.{
+                        .tag = .imul_complex,
+                        .ops = (Mir.Ops{
+                            .reg1 = dst_reg,
+                            .reg2 = src_reg,
+                        }).encode(),
+                        .data = undefined,
                     });
-                    encoder.opcode_2byte(0x0f, 0xaf);
-                    encoder.modRm_direct(
-                        dst_reg.low_id(),
-                        src_reg.low_id(),
-                    );
                 },
                 .immediate => |imm| {
-                    // register, immediate:
-                    // depends on size of immediate.
-                    //
-                    // immediate fits in i8:
-                    // 6B /r ib: IMUL r32/64, r/m32/64, imm8
-                    //
-                    // immediate fits in i32:
-                    // 69 /r id: IMUL r32/64, r/m32/64, imm32
-                    //
-                    // immediate is huge:
-                    // split into 2 instructions
-                    // 1) copy the 64 bit immediate into a tmp register
-                    // 2) perform register,register mul
-                    // 0F AF /r: IMUL r32/64, r/m32/64
-                    if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) {
-                        const abi_size = dst_ty.abiSize(self.target.*);
-                        const encoder = try Encoder.init(self.code, 4);
-                        encoder.rex(.{
-                            .w = abi_size == 8,
-                            .r = dst_reg.isExtended(),
-                            .b = dst_reg.isExtended(),
+                    // register, immediate
+                    if (imm <= math.maxInt(i32)) {
+                        _ = try self.addInst(.{
+                            .tag = .imul_complex,
+                            .ops = (Mir.Ops{
+                                .reg1 = dst_reg,
+                                .reg2 = dst_reg,
+                                .flags = 0b10,
+                            }).encode(),
+                            .data = .{ .imm = @intCast(i32, imm) },
                         });
-                        encoder.opcode_1byte(0x6B);
-                        encoder.modRm_direct(
-                            dst_reg.low_id(),
-                            dst_reg.low_id(),
-                        );
-                        encoder.imm8(@intCast(i8, imm));
-                    } else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) {
-                        const abi_size = dst_ty.abiSize(self.target.*);
-                        const encoder = try Encoder.init(self.code, 7);
-                        encoder.rex(.{
-                            .w = abi_size == 8,
-                            .r = dst_reg.isExtended(),
-                            .b = dst_reg.isExtended(),
-                        });
-                        encoder.opcode_1byte(0x69);
-                        encoder.modRm_direct(
-                            dst_reg.low_id(),
-                            dst_reg.low_id(),
-                        );
-                        encoder.imm32(@intCast(i32, imm));
                     } else {
                         const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv);
-                        return self.genX8664Imul(dst_ty, dst_mcv, MCValue{ .register = src_reg });
+                        return self.genIMulOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
                     }
                 },
                 .embedded_in_code, .memory, .stack_offset => {
@@ -1833,20 +1662,14 @@ fn genX8664Imul(
                     const dst_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
                     // multiply into dst_reg
                     // register, register
-                    // Use the following imul opcode
-                    // 0F AF /r: IMUL r32/64, r/m32/64
-                    const abi_size = dst_ty.abiSize(self.target.*);
-                    const encoder = try Encoder.init(self.code, 4);
-                    encoder.rex(.{
-                        .w = abi_size == 8,
-                        .r = dst_reg.isExtended(),
-                        .b = src_reg.isExtended(),
+                    _ = try self.addInst(.{
+                        .tag = .imul_complex,
+                        .ops = (Mir.Ops{
+                            .reg1 = dst_reg,
+                            .reg2 = src_reg,
+                        }).encode(),
+                        .data = undefined,
                     });
-                    encoder.opcode_2byte(0x0f, 0xaf);
-                    encoder.modRm_direct(
-                        dst_reg.low_id(),
-                        src_reg.low_id(),
-                    );
                     // copy dst_reg back out
                     return self.genSetStack(dst_ty, off, MCValue{ .register = dst_reg });
                 },
@@ -1871,73 +1694,6 @@ fn genX8664Imul(
     }
 }
 
-fn genX8664ModRMRegToStack(self: *Self, ty: Type, off: u32, reg: Register, opcode: u8) !void {
-    const abi_size = ty.abiSize(self.target.*);
-    const adj_off = off + abi_size;
-    if (off > math.maxInt(i32)) {
-        return self.fail("stack offset too large", .{});
-    }
-
-    const i_adj_off = -@intCast(i32, adj_off);
-    const encoder = try Encoder.init(self.code, 7);
-    encoder.rex(.{
-        .w = abi_size == 8,
-        .r = reg.isExtended(),
-    });
-    encoder.opcode_1byte(opcode);
-    if (i_adj_off < std.math.maxInt(i8)) {
-        // example: 48 89 55 7f           mov    QWORD PTR [rbp+0x7f],rdx
-        encoder.modRm_indirectDisp8(
-            reg.low_id(),
-            Register.ebp.low_id(),
-        );
-        encoder.disp8(@intCast(i8, i_adj_off));
-    } else {
-        // example: 48 89 95 80 00 00 00  mov    QWORD PTR [rbp+0x80],rdx
-        encoder.modRm_indirectDisp32(
-            reg.low_id(),
-            Register.ebp.low_id(),
-        );
-        encoder.disp32(i_adj_off);
-    }
-}
-
-fn genArgDbgInfo(self: *Self, inst: Air.Inst.Index, mcv: MCValue) !void {
-    const ty_str = self.air.instructions.items(.data)[inst].ty_str;
-    const zir = &self.mod_fn.owner_decl.getFileScope().zir;
-    const name = zir.nullTerminatedString(ty_str.str);
-    const name_with_null = name.ptr[0 .. name.len + 1];
-    const ty = self.air.getRefType(ty_str.ty);
-
-    switch (mcv) {
-        .register => |reg| {
-            switch (self.debug_output) {
-                .dwarf => |dbg_out| {
-                    try dbg_out.dbg_info.ensureUnusedCapacity(3);
-                    dbg_out.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter);
-                    dbg_out.dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
-                        1, // ULEB128 dwarf expression length
-                        reg.dwarfLocOp(),
-                    });
-                    try dbg_out.dbg_info.ensureUnusedCapacity(5 + name_with_null.len);
-                    try self.addDbgInfoTypeReloc(ty); // DW.AT.type,  DW.FORM.ref4
-                    dbg_out.dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string
-                },
-                .plan9 => {},
-                .none => {},
-            }
-        },
-        .stack_offset => {
-            switch (self.debug_output) {
-                .dwarf => {},
-                .plan9 => {},
-                .none => {},
-            }
-        },
-        else => {},
-    }
-}
-
 fn airArg(self: *Self, inst: Air.Inst.Index) !void {
     const arg_index = self.arg_index;
     self.arg_index += 1;
@@ -1946,8 +1702,15 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
     _ = ty;
 
     const mcv = self.args[arg_index];
-    try self.genArgDbgInfo(inst, mcv);
-
+    const payload = try self.addExtra(Mir.ArgDbgInfo{
+        .air_inst = inst,
+        .arg_index = @intCast(u32, arg_index), // TODO can arg_index: u32?
+    });
+    _ = try self.addInst(.{
+        .tag = .arg_dbg_info,
+        .ops = undefined,
+        .data = .{ .payload = payload },
+    });
     if (self.liveness.isUnused(inst))
         return self.finishAirBookkeeping();
 
@@ -1962,7 +1725,11 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
 }
 
 fn airBreakpoint(self: *Self) !void {
-    try self.code.append(0xcc); // int3
+    _ = try self.addInst(.{
+        .tag = .brk,
+        .ops = undefined,
+        .data = undefined,
+    });
     return self.finishAirBookkeeping();
 }
 
@@ -2021,7 +1788,6 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void {
         if (self.air.value(callee)) |func_value| {
             if (func_value.castTag(.function)) |func_payload| {
                 const func = func_payload.data;
-
                 const ptr_bits = self.target.cpu.arch.ptrBitWidth();
                 const ptr_bytes: u64 = @divExact(ptr_bits, 8);
                 const got_addr = if (self.bin_file.cast(link.File.Elf)) |elf_file| blk: {
@@ -2031,11 +1797,13 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void {
                     @intCast(u32, coff_file.offset_table_virtual_address + func.owner_decl.link.coff.offset_table_index * ptr_bytes)
                 else
                     unreachable;
-
-                // ff 14 25 xx xx xx xx    call [addr]
-                try self.code.ensureUnusedCapacity(7);
-                self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 });
-                mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr);
+                _ = try self.addInst(.{
+                    .tag = .call,
+                    .ops = (Mir.Ops{
+                        .flags = 0b01,
+                    }).encode(),
+                    .data = .{ .imm = @bitCast(i32, got_addr) },
+                });
             } else if (func_value.castTag(.extern_fn)) |_| {
                 return self.fail("TODO implement calling extern functions", .{});
             } else {
@@ -2089,26 +1857,21 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void {
                     .memory = func.owner_decl.link.macho.local_sym_index,
                 });
                 // callq *%rax
-                try self.code.ensureUnusedCapacity(2);
-                self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 });
+                _ = try self.addInst(.{
+                    .tag = .call,
+                    .ops = (Mir.Ops{
+                        .reg1 = .rax,
+                        .flags = 0b01,
+                    }).encode(),
+                    .data = undefined,
+                });
             } else if (func_value.castTag(.extern_fn)) |func_payload| {
                 const decl = func_payload.data;
                 const n_strx = try macho_file.addExternFn(mem.spanZ(decl.name));
-                const offset = blk: {
-                    // callq
-                    try self.code.ensureUnusedCapacity(5);
-                    self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 });
-                    break :blk @intCast(u32, self.code.items.len) - 4;
-                };
-                // Add relocation to the decl.
-                try macho_file.active_decl.?.link.macho.relocs.append(self.bin_file.allocator, .{
-                    .offset = offset,
-                    .target = .{ .global = n_strx },
-                    .addend = 0,
-                    .subtractor = null,
-                    .pcrel = true,
-                    .length = 2,
-                    .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH),
+                _ = try self.addInst(.{
+                    .tag = .call_extern,
+                    .ops = undefined,
+                    .data = .{ .extern_fn = n_strx },
                 });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
@@ -2157,11 +1920,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void {
                 const ptr_bytes: u64 = @divExact(ptr_bits, 8);
                 const got_addr = p9.bases.data;
                 const got_index = func_payload.data.owner_decl.link.plan9.got_index.?;
-                // ff 14 25 xx xx xx xx    call [addr]
-                try self.code.ensureUnusedCapacity(7);
-                self.code.appendSliceAssumeCapacity(&[3]u8{ 0xff, 0x14, 0x25 });
                 const fn_got_addr = got_addr + got_index * ptr_bytes;
-                mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, fn_got_addr));
+                _ = try self.addInst(.{
+                    .tag = .call,
+                    .ops = (Mir.Ops{
+                        .flags = 0b01,
+                    }).encode(),
+                    .data = .{ .imm = @bitCast(i32, @intCast(u32, fn_got_addr)) },
+                });
             } else return self.fail("TODO implement calling extern fn on plan9", .{});
         } else {
             return self.fail("TODO implement calling runtime known function pointer", .{});
@@ -2201,9 +1967,14 @@ fn ret(self: *Self, mcv: MCValue) !void {
     // TODO when implementing defer, this will need to jump to the appropriate defer expression.
     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
     // which is available if the jump is 127 bytes or less forward.
-    try self.code.resize(self.code.items.len + 5);
-    self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32
-    try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
+    const jmp_reloc = try self.addInst(.{
+        .tag = .jmp,
+        .ops = (Mir.Ops{
+            .flags = 0b00,
+        }).encode(),
+        .data = .{ .inst = undefined },
+    });
+    try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
 }
 
 fn airRet(self: *Self, inst: Air.Inst.Index) !void {
@@ -2233,8 +2004,6 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
     const lhs = try self.resolveInst(bin_op.lhs);
     const rhs = try self.resolveInst(bin_op.rhs);
     const result: MCValue = result: {
-        try self.code.ensureUnusedCapacity(8);
-
         // There are 2 operands, destination and source.
         // Either one, but not both, can be a memory operand.
         // Source operand can be an immediate, 8 bits or 32 bits.
@@ -2245,7 +2014,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
         // This instruction supports only signed 32-bit immediates at most.
         const src_mcv = try self.limitImmediateType(bin_op.rhs, i32);
 
-        try self.genX8664BinMathCode(Type.initTag(.bool), dst_mcv, src_mcv, 7, 0x38);
+        try self.genBinMathOpMir(.cmp, Type.initTag(.bool), dst_mcv, src_mcv);
         break :result switch (ty.isSignedInt()) {
             true => MCValue{ .compare_flags_signed = op },
             false => MCValue{ .compare_flags_unsigned = op },
@@ -2256,7 +2025,15 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
 
 fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void {
     const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt;
-    try self.dbgAdvancePCAndLine(dbg_stmt.line, dbg_stmt.column);
+    const payload = try self.addExtra(Mir.DbgLineColumn{
+        .line = dbg_stmt.line,
+        .column = dbg_stmt.column,
+    });
+    _ = try self.addInst(.{
+        .tag = .dbg_line,
+        .ops = undefined,
+        .data = .{ .payload = payload },
+    });
     return self.finishAirBookkeeping();
 }
 
@@ -2268,58 +2045,77 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     const else_body = self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len];
     const liveness_condbr = self.liveness.getCondBr(inst);
 
-    const reloc: Reloc = reloc: {
-        try self.code.ensureUnusedCapacity(6);
-
-        const opcode: u8 = switch (cond) {
-            .compare_flags_signed => |cmp_op| blk: {
-                // Here we map to the opposite opcode because the jump is to the false branch.
-                const opcode: u8 = switch (cmp_op) {
-                    .gte => 0x8c,
-                    .gt => 0x8e,
-                    .neq => 0x84,
-                    .lt => 0x8d,
-                    .lte => 0x8f,
-                    .eq => 0x85,
+    const reloc: Mir.Inst.Index = reloc: {
+        switch (cond) {
+            .compare_flags_signed => |cmp_op| {
+                // Here we map the opposites since the jump is to the false branch.
+                const flags: u2 = switch (cmp_op) {
+                    .gte => 0b10,
+                    .gt => 0b11,
+                    .neq => 0b01,
+                    .lt => 0b00,
+                    .lte => 0b01,
+                    .eq => 0b00,
                 };
-                break :blk opcode;
+                const tag: Mir.Inst.Tag = if (cmp_op == .neq or cmp_op == .eq)
+                    .cond_jmp_eq_ne
+                else
+                    .cond_jmp_greater_less;
+                const reloc = try self.addInst(.{
+                    .tag = tag,
+                    .ops = (Mir.Ops{
+                        .flags = flags,
+                    }).encode(),
+                    .data = .{ .inst = undefined },
+                });
+                break :reloc reloc;
             },
-            .compare_flags_unsigned => |cmp_op| blk: {
-                // Here we map to the opposite opcode because the jump is to the false branch.
-                const opcode: u8 = switch (cmp_op) {
-                    .gte => 0x82,
-                    .gt => 0x86,
-                    .neq => 0x84,
-                    .lt => 0x83,
-                    .lte => 0x87,
-                    .eq => 0x85,
+            .compare_flags_unsigned => |cmp_op| {
+                // Here we map the opposites since the jump is to the false branch.
+                const flags: u2 = switch (cmp_op) {
+                    .gte => 0b10,
+                    .gt => 0b11,
+                    .neq => 0b01,
+                    .lt => 0b00,
+                    .lte => 0b01,
+                    .eq => 0b00,
                 };
-                break :blk opcode;
+                const tag: Mir.Inst.Tag = if (cmp_op == .neq or cmp_op == .eq)
+                    .cond_jmp_eq_ne
+                else
+                    .cond_jmp_above_below;
+                const reloc = try self.addInst(.{
+                    .tag = tag,
+                    .ops = (Mir.Ops{
+                        .flags = flags,
+                    }).encode(),
+                    .data = .{ .inst = undefined },
+                });
+                break :reloc reloc;
             },
-            .register => |reg| blk: {
-                // test reg, 1
-                // TODO detect al, ax, eax
-                const encoder = try Encoder.init(self.code, 4);
-                encoder.rex(.{
-                    // TODO audit this codegen: we force w = true here to make
-                    // the value affect the big register
-                    .w = true,
-                    .b = reg.isExtended(),
+            .register => |reg| {
+                _ = try self.addInst(.{
+                    .tag = .@"test",
+                    .ops = (Mir.Ops{
+                        .reg1 = reg,
+                        .flags = 0b00,
+                    }).encode(),
+                    .data = .{ .imm = 1 },
+                });
+                const reloc = try self.addInst(.{
+                    .tag = .cond_jmp_eq_ne,
+                    .ops = (Mir.Ops{
+                        .flags = 0b01,
+                    }).encode(),
+                    .data = .{ .inst = undefined },
                 });
-                encoder.opcode_1byte(0xf6);
-                encoder.modRm_direct(
-                    0,
-                    reg.low_id(),
-                );
-                encoder.disp8(1);
-                break :blk 0x84;
+                break :reloc reloc;
             },
-            else => return self.fail("TODO implement condbr {s} when condition is {s}", .{ self.target.cpu.arch, @tagName(cond) }),
-        };
-        self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode });
-        const reloc = Reloc{ .rel32 = self.code.items.len };
-        self.code.items.len += 4;
-        break :reloc reloc;
+            else => return self.fail("TODO implement condbr {s} when condition is {s}", .{
+                self.target.cpu.arch,
+                @tagName(cond),
+            }),
+        }
     };
 
     // Capture the state of register and stack allocation state so that we can revert to it.
@@ -2578,25 +2374,18 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void {
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const loop = self.air.extraData(Air.Block, ty_pl.payload);
     const body = self.air.extra[loop.end..][0..loop.data.body_len];
-    const start_index = self.code.items.len;
+    const jmp_target = @intCast(u32, self.mir_instructions.len);
     try self.genBody(body);
-    try self.jump(start_index);
+    _ = try self.addInst(.{
+        .tag = .jmp,
+        .ops = (Mir.Ops{
+            .flags = 0b00,
+        }).encode(),
+        .data = .{ .inst = jmp_target },
+    });
     return self.finishAirBookkeeping();
 }
 
-/// Send control flow to the `index` of `self.code`.
-fn jump(self: *Self, index: usize) !void {
-    try self.code.ensureUnusedCapacity(5);
-    if (math.cast(i8, @intCast(i32, index) - (@intCast(i32, self.code.items.len + 2)))) |delta| {
-        self.code.appendAssumeCapacity(0xeb); // jmp rel8
-        self.code.appendAssumeCapacity(@bitCast(u8, delta));
-    } else |_| {
-        const delta = @intCast(i32, index) - (@intCast(i32, self.code.items.len + 5));
-        self.code.appendAssumeCapacity(0xe9); // jmp rel32
-        mem.writeIntLittle(i32, self.code.addManyAsArrayAssumeCapacity(4), delta);
-    }
-}
-
 fn airBlock(self: *Self, inst: Air.Inst.Index) !void {
     try self.blocks.putNoClobber(self.gpa, inst, .{
         // A block is a setup to be able to jump to the end.
@@ -2630,22 +2419,9 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     // return self.finishAir(inst, .dead, .{ condition, .none, .none });
 }
 
-fn performReloc(self: *Self, reloc: Reloc) !void {
-    switch (reloc) {
-        .rel32 => |pos| {
-            const amt = self.code.items.len - (pos + 4);
-            // Here it would be tempting to implement testing for amt == 0 and then elide the
-            // jump. However, that will cause a problem because other jumps may assume that they
-            // can jump to this code. Or maybe I didn't understand something when I was debugging.
-            // It could be worth another look. Anyway, that's why that isn't done here. Probably the
-            // best place to elide jumps will be in semantic analysis, by inlining blocks that only
-            // only have 1 break instruction.
-            const s32_amt = math.cast(i32, amt) catch
-                return self.fail("unable to perform relocation: jump too far", .{});
-            mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt);
-        },
-        .arm_branch => unreachable,
-    }
+fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void {
+    const next_inst = @intCast(u32, self.mir_instructions.len);
+    self.mir_instructions.items(.data)[reloc].inst = next_inst;
 }
 
 fn airBr(self: *Self, inst: Air.Inst.Index) !void {
@@ -2661,9 +2437,9 @@ fn airBoolOp(self: *Self, inst: Air.Inst.Index) !void {
         .dead
     else switch (air_tags[inst]) {
         // lhs AND rhs
-        .bool_and => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs),
+        .bool_and => try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs),
         // lhs OR rhs
-        .bool_or => try self.genX8664BinMath(inst, bin_op.lhs, bin_op.rhs),
+        .bool_or => try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs),
         else => unreachable, // Not a boolean operation
     };
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
@@ -2688,12 +2464,15 @@ fn brVoid(self: *Self, block: Air.Inst.Index) !void {
     const block_data = self.blocks.getPtr(block).?;
     // Emit a jump with a relocation. It will be patched up after the block ends.
     try block_data.relocs.ensureUnusedCapacity(self.gpa, 1);
-    // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
-    // which is available if the jump is 127 bytes or less forward.
-    try self.code.resize(self.code.items.len + 5);
-    self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32
     // Leave the jump offset undefined
-    block_data.relocs.appendAssumeCapacity(.{ .rel32 = self.code.items.len - 4 });
+    const jmp_reloc = try self.addInst(.{
+        .tag = .jmp,
+        .ops = (Mir.Ops{
+            .flags = 0b00,
+        }).encode(),
+        .data = .{ .inst = undefined },
+    });
+    block_data.relocs.appendAssumeCapacity(jmp_reloc);
 }
 
 fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
@@ -2750,22 +2529,35 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
             var iter = std.mem.tokenize(u8, asm_source, "\n\r");
             while (iter.next()) |ins| {
                 if (mem.eql(u8, ins, "syscall")) {
-                    try self.code.appendSlice(&[_]u8{ 0x0f, 0x05 });
+                    _ = try self.addInst(.{
+                        .tag = .syscall,
+                        .ops = undefined,
+                        .data = undefined,
+                    });
                 } else if (mem.indexOf(u8, ins, "push")) |_| {
                     const arg = ins[4..];
                     if (mem.indexOf(u8, arg, "$")) |l| {
-                        const n = std.fmt.parseInt(u8, ins[4 + l + 1 ..], 10) catch return self.fail("TODO implement more inline asm int parsing", .{});
-                        try self.code.appendSlice(&.{ 0x6a, n });
+                        const n = std.fmt.parseInt(u8, ins[4 + l + 1 ..], 10) catch {
+                            return self.fail("TODO implement more inline asm int parsing", .{});
+                        };
+                        _ = try self.addInst(.{
+                            .tag = .push,
+                            .ops = (Mir.Ops{
+                                .flags = 0b10,
+                            }).encode(),
+                            .data = .{ .imm = n },
+                        });
                     } else if (mem.indexOf(u8, arg, "%%")) |l| {
                         const reg_name = ins[4 + l + 2 ..];
                         const reg = parseRegName(reg_name) orelse
                             return self.fail("unrecognized register: '{s}'", .{reg_name});
-                        const low_id: u8 = reg.low_id();
-                        if (reg.isExtended()) {
-                            try self.code.appendSlice(&.{ 0x41, 0b1010000 | low_id });
-                        } else {
-                            try self.code.append(0b1010000 | low_id);
-                        }
+                        _ = try self.addInst(.{
+                            .tag = .push,
+                            .ops = (Mir.Ops{
+                                .reg1 = reg,
+                            }).encode(),
+                            .data = undefined,
+                        });
                     } else return self.fail("TODO more push operands", .{});
                 } else if (mem.indexOf(u8, ins, "pop")) |_| {
                     const arg = ins[3..];
@@ -2773,12 +2565,13 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
                         const reg_name = ins[3 + l + 2 ..];
                         const reg = parseRegName(reg_name) orelse
                             return self.fail("unrecognized register: '{s}'", .{reg_name});
-                        const low_id: u8 = reg.low_id();
-                        if (reg.isExtended()) {
-                            try self.code.appendSlice(&.{ 0x41, 0b1011000 | low_id });
-                        } else {
-                            try self.code.append(0b1011000 | low_id);
-                        }
+                        _ = try self.addInst(.{
+                            .tag = .pop,
+                            .ops = (Mir.Ops{
+                                .reg1 = reg,
+                            }).encode(),
+                            .data = undefined,
+                        });
                     } else return self.fail("TODO more pop operands", .{});
                 } else {
                     return self.fail("TODO implement support for more x86 assembly instructions", .{});
@@ -2870,7 +2663,6 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
             if (adj_off > 128) {
                 return self.fail("TODO implement set stack variable with large stack offset", .{});
             }
-            try self.code.ensureUnusedCapacity(8);
             switch (abi_size) {
                 1 => {
                     return self.fail("TODO implement set abi_size=1 stack variable with immediate", .{});
@@ -2879,34 +2671,57 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
                     return self.fail("TODO implement set abi_size=2 stack variable with immediate", .{});
                 },
                 4 => {
-                    const x = @intCast(u32, x_big);
                     // We have a positive stack offset value but we want a twos complement negative
                     // offset from rbp, which is at the top of the stack frame.
-                    const negative_offset = @intCast(i8, -@intCast(i32, adj_off));
-                    const twos_comp = @bitCast(u8, negative_offset);
                     // mov    DWORD PTR [rbp+offset], immediate
-                    self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
-                    mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
+                    const payload = try self.addExtra(Mir.ImmPair{
+                        .dest_off = -@intCast(i32, adj_off),
+                        .operand = @bitCast(i32, @intCast(u32, x_big)),
+                    });
+                    _ = try self.addInst(.{
+                        .tag = .mov,
+                        .ops = (Mir.Ops{
+                            .reg1 = .rbp,
+                            .flags = 0b11,
+                        }).encode(),
+                        .data = .{ .payload = payload },
+                    });
                 },
                 8 => {
                     // We have a positive stack offset value but we want a twos complement negative
                     // offset from rbp, which is at the top of the stack frame.
-                    const negative_offset = @intCast(i8, -@intCast(i32, adj_off));
-                    const twos_comp = @bitCast(u8, negative_offset);
+                    const negative_offset = -@intCast(i32, adj_off);
 
                     // 64 bit write to memory would take two mov's anyways so we
                     // insted just use two 32 bit writes to avoid register allocation
-                    try self.code.ensureUnusedCapacity(14);
-                    var buf: [8]u8 = undefined;
-                    mem.writeIntLittle(u64, &buf, x_big);
-
-                    // mov    DWORD PTR [rbp+offset+4], immediate
-                    self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp + 4 });
-                    self.code.appendSliceAssumeCapacity(buf[4..8]);
-
-                    // mov    DWORD PTR [rbp+offset], immediate
-                    self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
-                    self.code.appendSliceAssumeCapacity(buf[0..4]);
+                    {
+                        const payload = try self.addExtra(Mir.ImmPair{
+                            .dest_off = negative_offset + 4,
+                            .operand = @bitCast(i32, @truncate(u32, x_big >> 32)),
+                        });
+                        _ = try self.addInst(.{
+                            .tag = .mov,
+                            .ops = (Mir.Ops{
+                                .reg1 = .rbp,
+                                .flags = 0b11,
+                            }).encode(),
+                            .data = .{ .payload = payload },
+                        });
+                    }
+                    {
+                        const payload = try self.addExtra(Mir.ImmPair{
+                            .dest_off = negative_offset,
+                            .operand = @bitCast(i32, @truncate(u32, x_big)),
+                        });
+                        _ = try self.addInst(.{
+                            .tag = .mov,
+                            .ops = (Mir.Ops{
+                                .reg1 = .rbp,
+                                .flags = 0b11,
+                            }).encode(),
+                            .data = .{ .payload = payload },
+                        });
+                    }
                 },
                 else => {
                     return self.fail("TODO implement set abi_size=large stack variable with immediate", .{});
@@ -2920,7 +2735,20 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
             return self.genSetStack(ty, stack_offset, MCValue{ .register = reg });
         },
         .register => |reg| {
-            try self.genX8664ModRMRegToStack(ty, stack_offset, reg, 0x89);
+            if (stack_offset > math.maxInt(i32)) {
+                return self.fail("stack offset too large", .{});
+            }
+            const abi_size = ty.abiSize(self.target.*);
+            const adj_off = stack_offset + abi_size;
+            _ = try self.addInst(.{
+                .tag = .mov,
+                .ops = (Mir.Ops{
+                    .reg1 = reg,
+                    .reg2 = .ebp,
+                    .flags = 0b10,
+                }).encode(),
+                .data = .{ .imm = -@intCast(i32, adj_off) },
+            });
         },
         .memory => |vaddr| {
             _ = vaddr;
@@ -2958,25 +2786,26 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
             }
         },
         .compare_flags_unsigned => |op| {
-            const encoder = try Encoder.init(self.code, 7);
-            // TODO audit this codegen: we force w = true here to make
-            // the value affect the big register
-            encoder.rex(.{
-                .w = true,
-                .b = reg.isExtended(),
+            const tag: Mir.Inst.Tag = switch (op) {
+                .gte, .gt, .lt, .lte => .cond_set_byte_above_below,
+                .eq, .neq => .cond_set_byte_eq_ne,
+            };
+            const flags: u2 = switch (op) {
+                .gte => 0b00,
+                .gt => 0b01,
+                .lt => 0b10,
+                .lte => 0b11,
+                .eq => 0b01,
+                .neq => 0b00,
+            };
+            _ = try self.addInst(.{
+                .tag = tag,
+                .ops = (Mir.Ops{
+                    .reg1 = reg,
+                    .flags = flags,
+                }).encode(),
+                .data = undefined,
             });
-            encoder.opcode_2byte(0x0f, switch (op) {
-                .gte => 0x93,
-                .gt => 0x97,
-                .neq => 0x95,
-                .lt => 0x92,
-                .lte => 0x96,
-                .eq => 0x94,
-            });
-            encoder.modRm_direct(
-                0,
-                reg.low_id(),
-            );
         },
         .compare_flags_signed => |op| {
             _ = op;
@@ -2986,44 +2815,25 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
             // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
             // register is the fastest way to zero a register.
             if (x == 0) {
-                // The encoding for `xor r32, r32` is `0x31 /r`.
-                const encoder = try Encoder.init(self.code, 3);
-
-                // If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since
-                // this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB.
-                // Both R and B are set, as we're extending, in effect, the register bits *and* the operand.
-                encoder.rex(.{
-                    .r = reg.isExtended(),
-                    .b = reg.isExtended(),
+                _ = try self.addInst(.{
+                    .tag = .xor,
+                    .ops = (Mir.Ops{
+                        .reg1 = reg,
+                        .reg2 = reg,
+                    }).encode(),
+                    .data = undefined,
                 });
-                encoder.opcode_1byte(0x31);
-                // Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the
-                // ModR/M byte of the instruction contains a register operand and an r/m operand."
-                encoder.modRm_direct(
-                    reg.low_id(),
-                    reg.low_id(),
-                );
-
                 return;
             }
             if (x <= math.maxInt(i32)) {
                 // Next best case: if we set the lower four bytes, the upper four will be zeroed.
-                //
-                // The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM.
-
-                const encoder = try Encoder.init(self.code, 6);
-                // Just as with XORing, we need a REX prefix. This time though, we only
-                // need the B bit set, as we're extending the opcode's register field,
-                // and there is no Mod R/M byte.
-                encoder.rex(.{
-                    .b = reg.isExtended(),
+                _ = try self.addInst(.{
+                    .tag = .mov,
+                    .ops = (Mir.Ops{
+                        .reg1 = reg,
+                    }).encode(),
+                    .data = .{ .imm = @intCast(i32, x) },
                 });
-                encoder.opcode_withReg(0xB8, reg.low_id());
-
-                // no ModR/M byte
-
-                // IMM
-                encoder.imm32(@intCast(i32, x));
                 return;
             }
             // Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls
@@ -3033,137 +2843,87 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
             // This encoding is, in fact, the *same* as the one used for 32-bit loads. The only
             // difference is that we set REX.W before the instruction, which extends the load to
             // 64-bit and uses the full bit-width of the register.
-            {
-                const encoder = try Encoder.init(self.code, 10);
-                encoder.rex(.{
-                    .w = true,
-                    .b = reg.isExtended(),
-                });
-                encoder.opcode_withReg(0xB8, reg.low_id());
-                encoder.imm64(x);
-            }
+            const payload = try self.addExtra(Mir.Imm64.encode(x));
+            _ = try self.addInst(.{
+                .tag = .movabs,
+                .ops = (Mir.Ops{
+                    .reg1 = reg,
+                }).encode(),
+                .data = .{ .payload = payload },
+            });
         },
         .embedded_in_code => |code_offset| {
             // We need the offset from RIP in a signed i32 twos complement.
-            // The instruction is 7 bytes long and RIP points to the next instruction.
-
-            // 64-bit LEA is encoded as REX.W 8D /r.
-            const rip = self.code.items.len + 7;
-            const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip);
-            const offset = @intCast(i32, big_offset);
-            const encoder = try Encoder.init(self.code, 7);
-
-            // byte 1, always exists because w = true
-            encoder.rex(.{
-                .w = true,
-                .r = reg.isExtended(),
+            const payload = try self.addExtra(Mir.Imm64.encode(code_offset));
+            _ = try self.addInst(.{
+                .tag = .lea_rip,
+                .ops = (Mir.Ops{
+                    .reg1 = reg,
+                }).encode(),
+                .data = .{ .payload = payload },
             });
-            // byte 2
-            encoder.opcode_1byte(0x8D);
-            // byte 3
-            encoder.modRm_RIPDisp32(reg.low_id());
-            // byte 4-7
-            encoder.disp32(offset);
-
-            // Double check that we haven't done any math errors
-            assert(rip == self.code.items.len);
         },
         .register => |src_reg| {
             // If the registers are the same, nothing to do.
             if (src_reg.id() == reg.id())
                 return;
 
-            // This is a variant of 8B /r.
-            const abi_size = ty.abiSize(self.target.*);
-            const encoder = try Encoder.init(self.code, 3);
-            encoder.rex(.{
-                .w = abi_size == 8,
-                .r = reg.isExtended(),
-                .b = src_reg.isExtended(),
+            _ = try self.addInst(.{
+                .tag = .mov,
+                .ops = (Mir.Ops{
+                    .reg1 = reg,
+                    .reg2 = src_reg,
+                    .flags = 0b11,
+                }).encode(),
+                .data = undefined,
             });
-            encoder.opcode_1byte(0x8B);
-            encoder.modRm_direct(reg.low_id(), src_reg.low_id());
         },
         .memory => |x| {
+            // TODO can we move this entire logic into Emit.zig like with aarch64?
             if (self.bin_file.options.pie) {
-                // RIP-relative displacement to the entry in the GOT table.
-                const abi_size = ty.abiSize(self.target.*);
-                const encoder = try Encoder.init(self.code, 10);
-
-                // LEA reg, [<offset>]
-
-                // We encode the instruction FIRST because prefixes may or may not appear.
-                // After we encode the instruction, we will know that the displacement bytes
-                // for [<offset>] will be at self.code.items.len - 4.
-                encoder.rex(.{
-                    .w = true, // force 64 bit because loading an address (to the GOT)
-                    .r = reg.isExtended(),
+                // TODO we should flag up `x` as GOT symbol entry explicitly rather than as a hack.
+                _ = try self.addInst(.{
+                    .tag = .lea_rip,
+                    .ops = (Mir.Ops{
+                        .reg1 = reg,
+                        .flags = 0b01,
+                    }).encode(),
+                    .data = .{ .got_entry = @intCast(u32, x) },
                 });
-                encoder.opcode_1byte(0x8D);
-                encoder.modRm_RIPDisp32(reg.low_id());
-                encoder.disp32(0);
-
-                const offset = @intCast(u32, self.code.items.len);
-
-                if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                    // TODO I think the reloc might be in the wrong place.
-                    const decl = macho_file.active_decl.?;
-                    // Load reloc for LEA instruction.
-                    try decl.link.macho.relocs.append(self.bin_file.allocator, .{
-                        .offset = offset - 4,
-                        .target = .{ .local = @intCast(u32, x) },
-                        .addend = 0,
-                        .subtractor = null,
-                        .pcrel = true,
-                        .length = 2,
-                        .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT),
-                    });
-                } else {
-                    return self.fail("TODO implement genSetReg for PIE GOT indirection on this platform", .{});
-                }
-
                 // MOV reg, [reg]
-                encoder.rex(.{
-                    .w = abi_size == 8,
-                    .r = reg.isExtended(),
-                    .b = reg.isExtended(),
+                _ = try self.addInst(.{
+                    .tag = .mov,
+                    .ops = (Mir.Ops{
+                        .reg1 = reg,
+                        .reg2 = reg,
+                        .flags = 0b01,
+                    }).encode(),
+                    .data = .{ .imm = 0 },
                 });
-                encoder.opcode_1byte(0x8B);
-                encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id());
             } else if (x <= math.maxInt(i32)) {
-                // Moving from memory to a register is a variant of `8B /r`.
-                // Since we're using 64-bit moves, we require a REX.
-                // This variant also requires a SIB, as it would otherwise be RIP-relative.
-                // We want mode zero with the lower three bits set to four to indicate an SIB with no other displacement.
-                // The SIB must be 0x25, to indicate a disp32 with no scaled index.
-                // 0b00RRR100, where RRR is the lower three bits of the register ID.
-                // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32.
-                const abi_size = ty.abiSize(self.target.*);
-                const encoder = try Encoder.init(self.code, 8);
-                encoder.rex(.{
-                    .w = abi_size == 8,
-                    .r = reg.isExtended(),
+                // mov reg, [ds:imm32]
+                _ = try self.addInst(.{
+                    .tag = .mov,
+                    .ops = (Mir.Ops{
+                        .reg1 = reg,
+                        .flags = 0b01,
+                    }).encode(),
+                    .data = .{ .imm = @intCast(i32, x) },
                 });
-                encoder.opcode_1byte(0x8B);
-                // effective address = [SIB]
-                encoder.modRm_SIBDisp0(reg.low_id());
-                // SIB = disp32
-                encoder.sib_disp32();
-                encoder.disp32(@intCast(i32, x));
             } else {
-                // If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load
-                // the value.
+                // If this is RAX, we can use a direct load.
+                // Otherwise, we need to load the address, then indirectly load the value.
                 if (reg.id() == 0) {
-                    // REX.W 0xA1 moffs64*
-                    // moffs64* is a 64-bit offset "relative to segment base", which really just means the
-                    // absolute address for all practical purposes.
-
-                    const encoder = try Encoder.init(self.code, 10);
-                    encoder.rex(.{
-                        .w = true,
+                    // movabs rax, ds:moffs64
+                    const payload = try self.addExtra(Mir.Imm64.encode(x));
+                    _ = try self.addInst(.{
+                        .tag = .movabs,
+                        .ops = (Mir.Ops{
+                            .reg1 = .rax,
+                            .flags = 0b01, // imm64 will become moffs64
+                        }).encode(),
+                        .data = .{ .payload = payload },
                     });
-                    encoder.opcode_1byte(0xA1);
-                    encoder.writeIntLittle(u64, x);
                 } else {
                     // This requires two instructions; a move imm as used above, followed by an indirect load using the register
                     // as the address and the register as the destination.
@@ -3181,16 +2941,16 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
                     // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant.
                     // TODO: determine whether to allow other sized registers, and if so, handle them properly.
 
-                    // mov reg, [reg]
-                    const abi_size = ty.abiSize(self.target.*);
-                    const encoder = try Encoder.init(self.code, 3);
-                    encoder.rex(.{
-                        .w = abi_size == 8,
-                        .r = reg.isExtended(),
-                        .b = reg.isExtended(),
+                    // mov reg, [reg + 0x0]
+                    _ = try self.addInst(.{
+                        .tag = .mov,
+                        .ops = (Mir.Ops{
+                            .reg1 = reg,
+                            .reg2 = reg,
+                            .flags = 0b01,
+                        }).encode(),
+                        .data = .{ .imm = 0 },
                     });
-                    encoder.opcode_1byte(0x8B);
-                    encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id());
                 }
             }
         },
@@ -3201,21 +2961,15 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
                 return self.fail("stack offset too large", .{});
             }
             const ioff = -@intCast(i32, off);
-            const encoder = try Encoder.init(self.code, 3);
-            encoder.rex(.{
-                .w = abi_size == 8,
-                .r = reg.isExtended(),
+            _ = try self.addInst(.{
+                .tag = .mov,
+                .ops = (Mir.Ops{
+                    .reg1 = reg,
+                    .reg2 = .ebp,
+                    .flags = 0b01,
+                }).encode(),
+                .data = .{ .imm = ioff },
             });
-            encoder.opcode_1byte(0x8B);
-            if (std.math.minInt(i8) <= ioff and ioff <= std.math.maxInt(i8)) {
-                // Example: 48 8b 4d 7f           mov    rcx,QWORD PTR [rbp+0x7f]
-                encoder.modRm_indirectDisp8(reg.low_id(), Register.ebp.low_id());
-                encoder.disp8(@intCast(i8, ioff));
-            } else {
-                // Example: 48 8b 8d 80 00 00 00  mov    rcx,QWORD PTR [rbp+0x80]
-                encoder.modRm_indirectDisp32(reg.low_id(), Register.ebp.low_id());
-                encoder.disp32(ioff);
-            }
         },
     }
 }
src/arch/x86_64/Emit.zig
@@ -0,0 +1,1161 @@
+//! This file contains the functionality for lowering x86_64 MIR into
+//! machine code
+
+const Emit = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const bits = @import("bits.zig");
+const leb128 = std.leb;
+const link = @import("../../link.zig");
+const log = std.log.scoped(.codegen);
+const math = std.math;
+const mem = std.mem;
+
+const Air = @import("../../Air.zig");
+const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput;
+const DW = std.dwarf;
+const Encoder = bits.Encoder;
+const ErrorMsg = Module.ErrorMsg;
+const MCValue = @import("CodeGen.zig").MCValue;
+const Mir = @import("Mir.zig");
+const Module = @import("../../Module.zig");
+const Instruction = bits.Instruction;
+const Register = bits.Register;
+const Type = @import("../../type.zig").Type;
+
+mir: Mir,
+bin_file: *link.File,
+debug_output: DebugInfoOutput,
+target: *const std.Target,
+err_msg: ?*ErrorMsg = null,
+src_loc: Module.SrcLoc,
+code: *std.ArrayList(u8),
+
+prev_di_line: u32,
+prev_di_column: u32,
+/// Relative to the beginning of `code`.
+prev_di_pc: usize,
+
+code_offset_mapping: std.AutoHashMapUnmanaged(Mir.Inst.Index, usize) = .{},
+relocs: std.ArrayListUnmanaged(Reloc) = .{},
+
+const InnerError = error{
+    OutOfMemory,
+    EmitFail,
+};
+
+const Reloc = struct {
+    /// Offset of the instruction.
+    source: u64,
+    /// Target of the relocation.
+    target: Mir.Inst.Index,
+    /// Offset of the relocation within the instruction.
+    offset: u64,
+    /// Length of the instruction.
+    length: u5,
+};
+
+pub fn emitMir(emit: *Emit) InnerError!void {
+    const mir_tags = emit.mir.instructions.items(.tag);
+
+    for (mir_tags) |tag, index| {
+        const inst = @intCast(u32, index);
+        try emit.code_offset_mapping.putNoClobber(emit.bin_file.allocator, inst, emit.code.items.len);
+        switch (tag) {
+            .adc => try emit.mirArith(.adc, inst),
+            .add => try emit.mirArith(.add, inst),
+            .sub => try emit.mirArith(.sub, inst),
+            .xor => try emit.mirArith(.xor, inst),
+            .@"and" => try emit.mirArith(.@"and", inst),
+            .@"or" => try emit.mirArith(.@"or", inst),
+            .sbb => try emit.mirArith(.sbb, inst),
+            .cmp => try emit.mirArith(.cmp, inst),
+
+            .adc_scale_src => try emit.mirArithScaleSrc(.adc, inst),
+            .add_scale_src => try emit.mirArithScaleSrc(.add, inst),
+            .sub_scale_src => try emit.mirArithScaleSrc(.sub, inst),
+            .xor_scale_src => try emit.mirArithScaleSrc(.xor, inst),
+            .and_scale_src => try emit.mirArithScaleSrc(.@"and", inst),
+            .or_scale_src => try emit.mirArithScaleSrc(.@"or", inst),
+            .sbb_scale_src => try emit.mirArithScaleSrc(.sbb, inst),
+            .cmp_scale_src => try emit.mirArithScaleSrc(.cmp, inst),
+
+            .adc_scale_dst => try emit.mirArithScaleDst(.adc, inst),
+            .add_scale_dst => try emit.mirArithScaleDst(.add, inst),
+            .sub_scale_dst => try emit.mirArithScaleDst(.sub, inst),
+            .xor_scale_dst => try emit.mirArithScaleDst(.xor, inst),
+            .and_scale_dst => try emit.mirArithScaleDst(.@"and", inst),
+            .or_scale_dst => try emit.mirArithScaleDst(.@"or", inst),
+            .sbb_scale_dst => try emit.mirArithScaleDst(.sbb, inst),
+            .cmp_scale_dst => try emit.mirArithScaleDst(.cmp, inst),
+
+            .adc_scale_imm => try emit.mirArithScaleImm(.adc, inst),
+            .add_scale_imm => try emit.mirArithScaleImm(.add, inst),
+            .sub_scale_imm => try emit.mirArithScaleImm(.sub, inst),
+            .xor_scale_imm => try emit.mirArithScaleImm(.xor, inst),
+            .and_scale_imm => try emit.mirArithScaleImm(.@"and", inst),
+            .or_scale_imm => try emit.mirArithScaleImm(.@"or", inst),
+            .sbb_scale_imm => try emit.mirArithScaleImm(.sbb, inst),
+            .cmp_scale_imm => try emit.mirArithScaleImm(.cmp, inst),
+
+            // Even though MOV is technically not an arithmetic op,
+            // its structure can be represented using the same set of
+            // opcode primitives.
+            .mov => try emit.mirArith(.mov, inst),
+            .mov_scale_src => try emit.mirArithScaleSrc(.mov, inst),
+            .mov_scale_dst => try emit.mirArithScaleDst(.mov, inst),
+            .mov_scale_imm => try emit.mirArithScaleImm(.mov, inst),
+            .movabs => try emit.mirMovabs(inst),
+
+            .lea => try emit.mirLea(inst),
+            .lea_rip => try emit.mirLeaRip(inst),
+
+            .imul_complex => try emit.mirIMulComplex(inst),
+
+            .push => try emit.mirPushPop(.push, inst),
+            .pop => try emit.mirPushPop(.pop, inst),
+
+            .jmp => try emit.mirJmpCall(.jmp, inst),
+            .call => try emit.mirJmpCall(.call, inst),
+
+            .cond_jmp_greater_less => try emit.mirCondJmp(.cond_jmp_greater_less, inst),
+            .cond_jmp_above_below => try emit.mirCondJmp(.cond_jmp_above_below, inst),
+            .cond_jmp_eq_ne => try emit.mirCondJmp(.cond_jmp_eq_ne, inst),
+
+            .cond_set_byte_greater_less => try emit.mirCondSetByte(.cond_set_byte_greater_less, inst),
+            .cond_set_byte_above_below => try emit.mirCondSetByte(.cond_set_byte_above_below, inst),
+            .cond_set_byte_eq_ne => try emit.mirCondSetByte(.cond_set_byte_eq_ne, inst),
+
+            .ret => try emit.mirRet(inst),
+
+            .syscall => try emit.mirSyscall(),
+
+            .@"test" => try emit.mirTest(inst),
+
+            .brk => try emit.mirBrk(),
+
+            .call_extern => try emit.mirCallExtern(inst),
+
+            .dbg_line => try emit.mirDbgLine(inst),
+            .dbg_prologue_end => try emit.mirDbgPrologueEnd(inst),
+            .dbg_epilogue_begin => try emit.mirDbgEpilogueBegin(inst),
+            .arg_dbg_info => try emit.mirArgDbgInfo(inst),
+
+            else => {
+                return emit.fail("Implement MIR->Isel lowering for x86_64 for pseudo-inst: {s}", .{tag});
+            },
+        }
+    }
+
+    try emit.fixupRelocs();
+}
+
+pub fn deinit(emit: *Emit) void {
+    emit.relocs.deinit(emit.bin_file.allocator);
+    emit.code_offset_mapping.deinit(emit.bin_file.allocator);
+    emit.* = undefined;
+}
+
+fn fail(emit: *Emit, comptime format: []const u8, args: anytype) InnerError {
+    @setCold(true);
+    assert(emit.err_msg == null);
+    emit.err_msg = try ErrorMsg.create(emit.bin_file.allocator, emit.src_loc, format, args);
+    return error.EmitFail;
+}
+
+fn fixupRelocs(emit: *Emit) InnerError!void {
+    // TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size.
+    // This should be reversed like it is done in aarch64 MIR emit code: start with the smallest
+    // possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution
+    // until the entire decl is correctly emitted with all JMP/CALL instructions within range.
+    for (emit.relocs.items) |reloc| {
+        const target = emit.code_offset_mapping.get(reloc.target) orelse
+            return emit.fail("JMP/CALL relocation target not found!", .{});
+        const disp = @intCast(i32, @intCast(i64, target) - @intCast(i64, reloc.source + reloc.length));
+        mem.writeIntLittle(i32, emit.code.items[reloc.offset..][0..4], disp);
+    }
+}
+
+fn mirBrk(emit: *Emit) InnerError!void {
+    const encoder = try Encoder.init(emit.code, 1);
+    encoder.opcode_1byte(0xcc);
+}
+
+fn mirSyscall(emit: *Emit) InnerError!void {
+    const encoder = try Encoder.init(emit.code, 2);
+    encoder.opcode_2byte(0x0f, 0x05);
+}
+
+fn mirPushPop(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    switch (ops.flags) {
+        0b00 => {
+            // PUSH/POP reg
+            const opc: u8 = switch (tag) {
+                .push => 0x50,
+                .pop => 0x58,
+                else => unreachable,
+            };
+            const encoder = try Encoder.init(emit.code, 1);
+            encoder.opcode_withReg(opc, ops.reg1.lowId());
+        },
+        0b01 => {
+            // PUSH/POP r/m64
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            const opc: u8 = switch (tag) {
+                .push => 0xff,
+                .pop => 0x8f,
+                else => unreachable,
+            };
+            const modrm_ext: u3 = switch (tag) {
+                .push => 0x6,
+                .pop => 0x0,
+                else => unreachable,
+            };
+            const encoder = try Encoder.init(emit.code, 6);
+            encoder.opcode_1byte(opc);
+            if (math.cast(i8, imm)) |imm_i8| {
+                encoder.modRm_indirectDisp8(modrm_ext, ops.reg1.lowId());
+                encoder.imm8(@intCast(i8, imm_i8));
+            } else |_| {
+                encoder.modRm_indirectDisp32(modrm_ext, ops.reg1.lowId());
+                encoder.imm32(imm);
+            }
+        },
+        0b10 => {
+            // PUSH imm32
+            assert(tag == .push);
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            const opc: u8 = if (imm <= math.maxInt(i8)) 0x6a else 0x6b;
+            const encoder = try Encoder.init(emit.code, 2);
+            encoder.opcode_1byte(opc);
+            if (imm <= math.maxInt(i8)) {
+                encoder.imm8(@intCast(i8, imm));
+            } else if (imm <= math.maxInt(i16)) {
+                encoder.imm16(@intCast(i16, imm));
+            } else {
+                encoder.imm32(imm);
+            }
+        },
+        0b11 => unreachable,
+    }
+}
+
+fn mirJmpCall(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const flag = @truncate(u1, ops.flags);
+    if (flag == 0) {
+        const target = emit.mir.instructions.items(.data)[inst].inst;
+        const opc: u8 = switch (tag) {
+            .jmp => 0xe9,
+            .call => 0xe8,
+            else => unreachable,
+        };
+        const source = emit.code.items.len;
+        const encoder = try Encoder.init(emit.code, 5);
+        encoder.opcode_1byte(opc);
+        try emit.relocs.append(emit.bin_file.allocator, .{
+            .source = source,
+            .target = target,
+            .offset = emit.code.items.len,
+            .length = 5,
+        });
+        encoder.imm32(0x0);
+        return;
+    }
+    const modrm_ext: u3 = switch (tag) {
+        .jmp => 0x4,
+        .call => 0x2,
+        else => unreachable,
+    };
+    if (ops.reg1 == .none) {
+        // JMP/CALL [imm]
+        const imm = emit.mir.instructions.items(.data)[inst].imm;
+        const encoder = try Encoder.init(emit.code, 7);
+        encoder.opcode_1byte(0xff);
+        encoder.modRm_SIBDisp0(modrm_ext);
+        encoder.sib_disp32();
+        encoder.imm32(imm);
+        return;
+    }
+    // JMP/CALL reg
+    const encoder = try Encoder.init(emit.code, 2);
+    encoder.opcode_1byte(0xff);
+    encoder.modRm_direct(modrm_ext, ops.reg1.lowId());
+}
+
+const CondType = enum {
+    /// greater than or equal
+    gte,
+
+    /// greater than
+    gt,
+
+    /// less than
+    lt,
+
+    /// less than or equal
+    lte,
+
+    /// above or equal
+    ae,
+
+    /// above
+    a,
+
+    /// below
+    b,
+
+    /// below or equal
+    be,
+
+    /// not equal
+    ne,
+
+    /// equal
+    eq,
+
+    fn fromTagAndFlags(tag: Mir.Inst.Tag, flags: u2) CondType {
+        return switch (tag) {
+            .cond_jmp_greater_less,
+            .cond_set_byte_greater_less,
+            => switch (flags) {
+                0b00 => CondType.gte,
+                0b01 => CondType.gt,
+                0b10 => CondType.lt,
+                0b11 => CondType.lte,
+            },
+            .cond_jmp_above_below,
+            .cond_set_byte_above_below,
+            => switch (flags) {
+                0b00 => CondType.ae,
+                0b01 => CondType.a,
+                0b10 => CondType.b,
+                0b11 => CondType.be,
+            },
+            .cond_jmp_eq_ne,
+            .cond_set_byte_eq_ne,
+            => switch (@truncate(u1, flags)) {
+                0b0 => CondType.ne,
+                0b1 => CondType.eq,
+            },
+            else => unreachable,
+        };
+    }
+};
+
+inline fn getCondOpCode(tag: Mir.Inst.Tag, cond: CondType) u8 {
+    switch (cond) {
+        .gte => return switch (tag) {
+            .cond_jmp_greater_less => 0x8d,
+            .cond_set_byte_greater_less => 0x9d,
+            else => unreachable,
+        },
+        .gt => return switch (tag) {
+            .cond_jmp_greater_less => 0x8f,
+            .cond_set_byte_greater_less => 0x9f,
+            else => unreachable,
+        },
+        .lt => return switch (tag) {
+            .cond_jmp_greater_less => 0x8c,
+            .cond_set_byte_greater_less => 0x9c,
+            else => unreachable,
+        },
+        .lte => return switch (tag) {
+            .cond_jmp_greater_less => 0x8e,
+            .cond_set_byte_greater_less => 0x9e,
+            else => unreachable,
+        },
+        .ae => return switch (tag) {
+            .cond_jmp_above_below => 0x83,
+            .cond_set_byte_above_below => 0x93,
+            else => unreachable,
+        },
+        .a => return switch (tag) {
+            .cond_jmp_above_below => 0x87,
+            .cond_set_byte_greater_less => 0x97,
+            else => unreachable,
+        },
+        .b => return switch (tag) {
+            .cond_jmp_above_below => 0x82,
+            .cond_set_byte_greater_less => 0x92,
+            else => unreachable,
+        },
+        .be => return switch (tag) {
+            .cond_jmp_above_below => 0x86,
+            .cond_set_byte_greater_less => 0x96,
+            else => unreachable,
+        },
+        .eq => return switch (tag) {
+            .cond_jmp_eq_ne => 0x84,
+            .cond_set_byte_eq_ne => 0x94,
+            else => unreachable,
+        },
+        .ne => return switch (tag) {
+            .cond_jmp_eq_ne => 0x85,
+            .cond_set_byte_eq_ne => 0x95,
+            else => unreachable,
+        },
+    }
+}
+
+fn mirCondJmp(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const target = emit.mir.instructions.items(.data)[inst].inst;
+    const cond = CondType.fromTagAndFlags(tag, ops.flags);
+    const opc = getCondOpCode(tag, cond);
+    const source = emit.code.items.len;
+    const encoder = try Encoder.init(emit.code, 6);
+    encoder.opcode_2byte(0x0f, opc);
+    try emit.relocs.append(emit.bin_file.allocator, .{
+        .source = source,
+        .target = target,
+        .offset = emit.code.items.len,
+        .length = 6,
+    });
+    encoder.imm32(0);
+}
+
+fn mirCondSetByte(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const cond = CondType.fromTagAndFlags(tag, ops.flags);
+    const opc = getCondOpCode(tag, cond);
+    const encoder = try Encoder.init(emit.code, 4);
+    encoder.rex(.{
+        .w = true,
+        .b = ops.reg1.isExtended(),
+    });
+    encoder.opcode_2byte(0x0f, opc);
+    encoder.modRm_direct(0x0, ops.reg1.lowId());
+}
+
+fn mirTest(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .@"test");
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    switch (ops.flags) {
+        0b00 => blk: {
+            if (ops.reg2 == .none) {
+                // TEST r/m64, imm32
+                const imm = emit.mir.instructions.items(.data)[inst].imm;
+                if (ops.reg1.to64() == .rax) {
+                    // TODO reduce the size of the instruction if the immediate
+                    // is smaller than 32 bits
+                    const encoder = try Encoder.init(emit.code, 6);
+                    encoder.rex(.{
+                        .w = true,
+                    });
+                    encoder.opcode_1byte(0xa9);
+                    encoder.imm32(imm);
+                    break :blk;
+                }
+                const opc: u8 = if (ops.reg1.size() == 8) 0xf6 else 0xf7;
+                const encoder = try Encoder.init(emit.code, 7);
+                encoder.rex(.{
+                    .w = true,
+                    .b = ops.reg1.isExtended(),
+                });
+                encoder.opcode_1byte(opc);
+                encoder.modRm_direct(0, ops.reg1.lowId());
+                encoder.imm8(@intCast(i8, imm));
+                break :blk;
+            }
+            // TEST r/m64, r64
+            return emit.fail("TODO TEST r/m64, r64", .{});
+        },
+        else => return emit.fail("TODO more TEST alternatives", .{}),
+    }
+}
+
+fn mirRet(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .ret);
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const encoder = try Encoder.init(emit.code, 3);
+    switch (ops.flags) {
+        0b00 => {
+            // RETF imm16
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            encoder.opcode_1byte(0xca);
+            encoder.imm16(@intCast(i16, imm));
+        },
+        0b01 => encoder.opcode_1byte(0xcb), // RETF
+        0b10 => {
+            // RET imm16
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            encoder.opcode_1byte(0xc2);
+            encoder.imm16(@intCast(i16, imm));
+        },
+        0b11 => encoder.opcode_1byte(0xc3), // RET
+    }
+}
+
+const EncType = enum {
+    /// OP r/m64, imm32
+    mi,
+
+    /// OP r/m64, r64
+    mr,
+
+    /// OP r64, r/m64
+    rm,
+};
+
+const OpCode = struct {
+    opc: u8,
+    /// Only used if `EncType == .mi`.
+    modrm_ext: u3,
+};
+
+inline fn getArithOpCode(tag: Mir.Inst.Tag, enc: EncType) OpCode {
+    switch (enc) {
+        .mi => return switch (tag) {
+            .adc => .{ .opc = 0x81, .modrm_ext = 0x2 },
+            .add => .{ .opc = 0x81, .modrm_ext = 0x0 },
+            .sub => .{ .opc = 0x81, .modrm_ext = 0x5 },
+            .xor => .{ .opc = 0x81, .modrm_ext = 0x6 },
+            .@"and" => .{ .opc = 0x81, .modrm_ext = 0x4 },
+            .@"or" => .{ .opc = 0x81, .modrm_ext = 0x1 },
+            .sbb => .{ .opc = 0x81, .modrm_ext = 0x3 },
+            .cmp => .{ .opc = 0x81, .modrm_ext = 0x7 },
+            .mov => .{ .opc = 0xc7, .modrm_ext = 0x0 },
+            else => unreachable,
+        },
+        .mr => {
+            const opc: u8 = switch (tag) {
+                .adc => 0x11,
+                .add => 0x01,
+                .sub => 0x29,
+                .xor => 0x31,
+                .@"and" => 0x21,
+                .@"or" => 0x09,
+                .sbb => 0x19,
+                .cmp => 0x39,
+                .mov => 0x89,
+                else => unreachable,
+            };
+            return .{ .opc = opc, .modrm_ext = undefined };
+        },
+        .rm => {
+            const opc: u8 = switch (tag) {
+                .adc => 0x13,
+                .add => 0x03,
+                .sub => 0x2b,
+                .xor => 0x33,
+                .@"and" => 0x23,
+                .@"or" => 0x0b,
+                .sbb => 0x1b,
+                .cmp => 0x3b,
+                .mov => 0x8b,
+                else => unreachable,
+            };
+            return .{ .opc = opc, .modrm_ext = undefined };
+        },
+    }
+}
+
+fn mirArith(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    switch (ops.flags) {
+        0b00 => blk: {
+            if (ops.reg2 == .none) {
+                // OP reg1, imm32
+                // OP r/m64, imm32
+                const imm = emit.mir.instructions.items(.data)[inst].imm;
+                const opcode = getArithOpCode(tag, .mi);
+                const encoder = try Encoder.init(emit.code, 7);
+                encoder.rex(.{
+                    .w = ops.reg1.size() == 64,
+                    .b = ops.reg1.isExtended(),
+                });
+                if (tag != .mov and imm <= math.maxInt(i8)) {
+                    encoder.opcode_1byte(opcode.opc + 2);
+                    encoder.modRm_direct(opcode.modrm_ext, ops.reg1.lowId());
+                    encoder.imm8(@intCast(i8, imm));
+                } else {
+                    encoder.opcode_1byte(opcode.opc);
+                    encoder.modRm_direct(opcode.modrm_ext, ops.reg1.lowId());
+                    encoder.imm32(imm);
+                }
+                break :blk;
+            }
+            // OP reg1, reg2
+            // OP r/m64, r64
+            const opcode = getArithOpCode(tag, .mr);
+            const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+            const encoder = try Encoder.init(emit.code, 3);
+            encoder.rex(.{
+                .w = ops.reg1.size() == 64 and ops.reg2.size() == 64,
+                .r = ops.reg1.isExtended(),
+                .b = ops.reg2.isExtended(),
+            });
+            encoder.opcode_1byte(opc);
+            encoder.modRm_direct(ops.reg1.lowId(), ops.reg2.lowId());
+        },
+        0b01 => blk: {
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            const opcode = getArithOpCode(tag, .rm);
+            const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+            if (ops.reg2 == .none) {
+                // OP reg1, [imm32]
+                // OP r64, r/m64
+                const encoder = try Encoder.init(emit.code, 8);
+                encoder.rex(.{
+                    .w = ops.reg1.size() == 64,
+                    .b = ops.reg1.isExtended(),
+                });
+                encoder.opcode_1byte(opc);
+                encoder.modRm_SIBDisp0(ops.reg1.lowId());
+                encoder.sib_disp32();
+                encoder.disp32(imm);
+                break :blk;
+            }
+            // OP reg1, [reg2 + imm32]
+            // OP r64, r/m64
+            const encoder = try Encoder.init(emit.code, 7);
+            encoder.rex(.{
+                .w = ops.reg1.size() == 64,
+                .r = ops.reg1.isExtended(),
+                .b = ops.reg2.isExtended(),
+            });
+            encoder.opcode_1byte(opc);
+            if (imm <= math.maxInt(i8)) {
+                encoder.modRm_indirectDisp8(ops.reg1.lowId(), ops.reg2.lowId());
+                encoder.disp8(@intCast(i8, imm));
+            } else {
+                encoder.modRm_indirectDisp32(ops.reg1.lowId(), ops.reg2.lowId());
+                encoder.disp32(imm);
+            }
+        },
+        0b10 => blk: {
+            if (ops.reg2 == .none) {
+                // OP [reg1 + 0], imm32
+                // OP r/m64, imm32
+                const imm = emit.mir.instructions.items(.data)[inst].imm;
+                const opcode = getArithOpCode(tag, .mi);
+                const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+                const encoder = try Encoder.init(emit.code, 7);
+                encoder.rex(.{
+                    .w = ops.reg1.size() == 64,
+                    .b = ops.reg1.isExtended(),
+                });
+                encoder.opcode_1byte(opc);
+                encoder.modRm_indirectDisp0(opcode.modrm_ext, ops.reg1.lowId());
+                if (imm <= math.maxInt(i8)) {
+                    encoder.imm8(@intCast(i8, imm));
+                } else if (imm <= math.maxInt(i16)) {
+                    encoder.imm16(@intCast(i16, imm));
+                } else {
+                    encoder.imm32(imm);
+                }
+                break :blk;
+            }
+            // OP [reg1 + imm32], reg2
+            // OP r/m64, r64
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            const opcode = getArithOpCode(tag, .mr);
+            const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+            const encoder = try Encoder.init(emit.code, 7);
+            encoder.rex(.{
+                .w = ops.reg2.size() == 64,
+                .r = ops.reg1.isExtended(),
+                .b = ops.reg2.isExtended(),
+            });
+            encoder.opcode_1byte(opc);
+            if (imm <= math.maxInt(i8)) {
+                encoder.modRm_indirectDisp8(ops.reg1.lowId(), ops.reg2.lowId());
+                encoder.disp8(@intCast(i8, imm));
+            } else {
+                encoder.modRm_indirectDisp32(ops.reg1.lowId(), ops.reg2.lowId());
+                encoder.disp32(imm);
+            }
+        },
+        0b11 => blk: {
+            if (ops.reg2 == .none) {
+                // OP [reg1 + imm32], imm32
+                // OP r/m64, imm32
+                const payload = emit.mir.instructions.items(.data)[inst].payload;
+                const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+                const opcode = getArithOpCode(tag, .mi);
+                const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+                const encoder = try Encoder.init(emit.code, 11);
+                encoder.rex(.{
+                    .w = false,
+                    .b = ops.reg1.isExtended(),
+                });
+                encoder.opcode_1byte(opc);
+                if (imm_pair.dest_off <= math.maxInt(i8)) {
+                    encoder.modRm_indirectDisp8(opcode.modrm_ext, ops.reg1.lowId());
+                    encoder.disp8(@intCast(i8, imm_pair.dest_off));
+                } else {
+                    encoder.modRm_indirectDisp32(opcode.modrm_ext, ops.reg1.lowId());
+                    encoder.disp32(imm_pair.dest_off);
+                }
+                encoder.imm32(imm_pair.operand);
+                break :blk;
+            }
+            // TODO clearly mov doesn't belong here; for other, arithemtic ops,
+            // this is the same as 0b00.
+            const opcode = getArithOpCode(tag, if (tag == .mov) .rm else .mr);
+            const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+            const encoder = try Encoder.init(emit.code, 3);
+            encoder.rex(.{
+                .w = ops.reg1.size() == 64 and ops.reg2.size() == 64,
+                .r = ops.reg1.isExtended(),
+                .b = ops.reg2.isExtended(),
+            });
+            encoder.opcode_1byte(opc);
+            encoder.modRm_direct(ops.reg1.lowId(), ops.reg2.lowId());
+        },
+    }
+}
+
+fn mirArithScaleSrc(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const scale = ops.flags;
+    // OP reg1, [reg2 + scale*rcx + imm32]
+    const opcode = getArithOpCode(tag, .rm);
+    const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+    const imm = emit.mir.instructions.items(.data)[inst].imm;
+    const encoder = try Encoder.init(emit.code, 8);
+    encoder.rex(.{
+        .w = ops.reg1.size() == 64,
+        .r = ops.reg1.isExtended(),
+        .b = ops.reg2.isExtended(),
+    });
+    encoder.opcode_1byte(opc);
+    if (imm <= math.maxInt(i8)) {
+        encoder.modRm_SIBDisp8(ops.reg1.lowId());
+        encoder.sib_scaleIndexBaseDisp8(scale, Register.rcx.lowId(), ops.reg2.lowId());
+        encoder.disp8(@intCast(i8, imm));
+    } else {
+        encoder.modRm_SIBDisp32(ops.reg1.lowId());
+        encoder.sib_scaleIndexBaseDisp32(scale, Register.rcx.lowId(), ops.reg2.lowId());
+        encoder.disp32(imm);
+    }
+}
+
+fn mirArithScaleDst(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const scale = ops.flags;
+    const imm = emit.mir.instructions.items(.data)[inst].imm;
+
+    if (ops.reg2 == .none) {
+        // OP [reg1 + scale*rax + 0], imm32
+        const opcode = getArithOpCode(tag, .mi);
+        const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+        const encoder = try Encoder.init(emit.code, 8);
+        encoder.rex(.{
+            .w = ops.reg1.size() == 64,
+            .b = ops.reg1.isExtended(),
+        });
+        encoder.opcode_1byte(opc);
+        encoder.modRm_SIBDisp0(opcode.modrm_ext);
+        encoder.sib_scaleIndexBase(scale, Register.rax.lowId(), ops.reg1.lowId());
+        if (imm <= math.maxInt(i8)) {
+            encoder.imm8(@intCast(i8, imm));
+        } else if (imm <= math.maxInt(i16)) {
+            encoder.imm16(@intCast(i16, imm));
+        } else {
+            encoder.imm32(imm);
+        }
+        return;
+    }
+
+    // OP [reg1 + scale*rax + imm32], reg2
+    const opcode = getArithOpCode(tag, .mr);
+    const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+    const encoder = try Encoder.init(emit.code, 8);
+    encoder.rex(.{
+        .w = ops.reg1.size() == 64,
+        .r = ops.reg2.isExtended(),
+        .b = ops.reg1.isExtended(),
+    });
+    encoder.opcode_1byte(opc);
+    if (imm <= math.maxInt(i8)) {
+        encoder.modRm_SIBDisp8(ops.reg2.lowId());
+        encoder.sib_scaleIndexBaseDisp8(scale, Register.rax.lowId(), ops.reg1.lowId());
+        encoder.disp8(@intCast(i8, imm));
+    } else {
+        encoder.modRm_SIBDisp32(ops.reg2.lowId());
+        encoder.sib_scaleIndexBaseDisp32(scale, Register.rax.lowId(), ops.reg1.lowId());
+        encoder.disp32(imm);
+    }
+}
+
+fn mirArithScaleImm(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const scale = ops.flags;
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const opcode = getArithOpCode(tag, .mi);
+    const opc = if (ops.reg1.size() == 8) opcode.opc - 1 else opcode.opc;
+    const encoder = try Encoder.init(emit.code, 2);
+    encoder.rex(.{
+        .w = ops.reg1.size() == 64,
+        .b = ops.reg1.isExtended(),
+    });
+    encoder.opcode_1byte(opc);
+    if (imm_pair.dest_off <= math.maxInt(i8)) {
+        encoder.modRm_SIBDisp8(opcode.modrm_ext);
+        encoder.sib_scaleIndexBaseDisp8(scale, Register.rax.lowId(), ops.reg1.lowId());
+        encoder.disp8(@intCast(i8, imm_pair.dest_off));
+    } else {
+        encoder.modRm_SIBDisp32(opcode.modrm_ext);
+        encoder.sib_scaleIndexBaseDisp32(scale, Register.rax.lowId(), ops.reg1.lowId());
+        encoder.disp32(imm_pair.dest_off);
+    }
+    encoder.imm32(imm_pair.operand);
+}
+
+fn mirMovabs(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .movabs);
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+
+    const encoder = try Encoder.init(emit.code, 10);
+    const is_64 = blk: {
+        if (ops.flags == 0b00) {
+            // movabs reg, imm64
+            const opc: u8 = if (ops.reg1.size() == 8) 0xb0 else 0xb8;
+            if (ops.reg1.size() == 64) {
+                encoder.rex(.{
+                    .w = true,
+                    .b = ops.reg1.isExtended(),
+                });
+                encoder.opcode_withReg(opc, ops.reg1.lowId());
+                break :blk true;
+            }
+            break :blk false;
+        }
+        if (ops.reg1 == .none) {
+            // movabs moffs64, rax
+            const opc: u8 = if (ops.reg2.size() == 8) 0xa2 else 0xa3;
+            encoder.rex(.{
+                .w = ops.reg2.size() == 64,
+            });
+            encoder.opcode_1byte(opc);
+            break :blk ops.reg2.size() == 64;
+        } else {
+            // movabs rax, moffs64
+            const opc: u8 = if (ops.reg2.size() == 8) 0xa0 else 0xa1;
+            encoder.rex(.{
+                .w = ops.reg1.size() == 64,
+            });
+            encoder.opcode_1byte(opc);
+            break :blk ops.reg1.size() == 64;
+        }
+    };
+
+    if (is_64) {
+        const payload = emit.mir.instructions.items(.data)[inst].payload;
+        const imm64 = emit.mir.extraData(Mir.Imm64, payload).data;
+        encoder.imm64(imm64.decode());
+    } else {
+        const imm = emit.mir.instructions.items(.data)[inst].imm;
+        if (imm <= math.maxInt(i8)) {
+            encoder.imm8(@intCast(i8, imm));
+        } else if (imm <= math.maxInt(i16)) {
+            encoder.imm16(@intCast(i16, imm));
+        } else {
+            encoder.imm32(imm);
+        }
+    }
+}
+
+fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .imul_complex);
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    switch (ops.flags) {
+        0b00 => {
+            const encoder = try Encoder.init(emit.code, 4);
+            encoder.rex(.{
+                .w = ops.reg1.size() == 64,
+                .r = ops.reg1.isExtended(),
+                .b = ops.reg2.isExtended(),
+            });
+            encoder.opcode_2byte(0x0f, 0xaf);
+            encoder.modRm_direct(ops.reg1.lowId(), ops.reg2.lowId());
+        },
+        0b10 => {
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            const opc: u8 = if (imm <= math.maxInt(i8)) 0x6b else 0x69;
+            const encoder = try Encoder.init(emit.code, 7);
+            encoder.rex(.{
+                .w = ops.reg1.size() == 64,
+                .r = ops.reg1.isExtended(),
+                .b = ops.reg1.isExtended(),
+            });
+            encoder.opcode_1byte(opc);
+            encoder.modRm_direct(ops.reg1.lowId(), ops.reg2.lowId());
+            if (imm <= math.maxInt(i8)) {
+                encoder.imm8(@intCast(i8, imm));
+            } else if (imm <= math.maxInt(i16)) {
+                encoder.imm16(@intCast(i16, imm));
+            } else {
+                encoder.imm32(imm);
+            }
+        },
+        else => return emit.fail("TODO implement imul", .{}),
+    }
+}
+
+fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .lea);
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    assert(ops.flags == 0b01);
+    const imm = emit.mir.instructions.items(.data)[inst].imm;
+
+    if (imm == 0) {
+        const encoder = try Encoder.init(emit.code, 3);
+        encoder.rex(.{
+            .w = ops.reg1.size() == 64,
+            .r = ops.reg1.isExtended(),
+            .b = ops.reg2.isExtended(),
+        });
+        encoder.opcode_1byte(0x8d);
+        encoder.modRm_indirectDisp0(ops.reg1.lowId(), ops.reg2.lowId());
+    } else if (imm <= math.maxInt(i8)) {
+        const encoder = try Encoder.init(emit.code, 4);
+        encoder.rex(.{
+            .w = ops.reg1.size() == 64,
+            .r = ops.reg1.isExtended(),
+            .b = ops.reg2.isExtended(),
+        });
+        encoder.opcode_1byte(0x8d);
+        encoder.modRm_indirectDisp8(ops.reg1.lowId(), ops.reg2.lowId());
+        encoder.disp8(@intCast(i8, imm));
+    } else {
+        const encoder = try Encoder.init(emit.code, 7);
+        encoder.rex(.{
+            .w = ops.reg1.size() == 64,
+            .r = ops.reg1.isExtended(),
+            .b = ops.reg2.isExtended(),
+        });
+        encoder.opcode_1byte(0x8d);
+        encoder.modRm_indirectDisp32(ops.reg1.lowId(), ops.reg2.lowId());
+        encoder.disp32(imm);
+    }
+}
+
+fn mirLeaRip(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .lea_rip);
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const start_offset = emit.code.items.len;
+    const encoder = try Encoder.init(emit.code, 7);
+    encoder.rex(.{
+        .w = ops.reg1.size() == 64,
+        .r = ops.reg1.isExtended(),
+    });
+    encoder.opcode_1byte(0x8d);
+    encoder.modRm_RIPDisp32(ops.reg1.lowId());
+    const end_offset = emit.code.items.len;
+    if (@truncate(u1, ops.flags) == 0b0) {
+        const payload = emit.mir.instructions.items(.data)[inst].payload;
+        const imm = emit.mir.extraData(Mir.Imm64, payload).data.decode();
+        encoder.disp32(@intCast(i32, @intCast(i64, imm) - @intCast(i64, end_offset - start_offset + 4)));
+    } else {
+        const got_entry = emit.mir.instructions.items(.data)[inst].got_entry;
+        encoder.disp32(0);
+        if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
+            // TODO I think the reloc might be in the wrong place.
+            const decl = macho_file.active_decl.?;
+            try decl.link.macho.relocs.append(emit.bin_file.allocator, .{
+                .offset = @intCast(u32, end_offset),
+                .target = .{ .local = got_entry },
+                .addend = 0,
+                .subtractor = null,
+                .pcrel = true,
+                .length = 2,
+                .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT),
+            });
+        } else {
+            return emit.fail("TODO implement lea_rip for linking backends different than MachO", .{});
+        }
+    }
+}
+
+fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .call_extern);
+    const n_strx = emit.mir.instructions.items(.data)[inst].extern_fn;
+    const offset = blk: {
+        const offset = @intCast(u32, emit.code.items.len + 1);
+        // callq
+        const encoder = try Encoder.init(emit.code, 5);
+        encoder.opcode_1byte(0xe8);
+        encoder.imm32(0x0);
+        break :blk offset;
+    };
+    if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
+        // Add relocation to the decl.
+        try macho_file.active_decl.?.link.macho.relocs.append(emit.bin_file.allocator, .{
+            .offset = offset,
+            .target = .{ .global = n_strx },
+            .addend = 0,
+            .subtractor = null,
+            .pcrel = true,
+            .length = 2,
+            .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH),
+        });
+    } else {
+        return emit.fail("TODO implement call_extern for linking backends different than MachO", .{});
+    }
+}
+
+fn mirDbgLine(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .dbg_line);
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const dbg_line_column = emit.mir.extraData(Mir.DbgLineColumn, payload).data;
+    try emit.dbgAdvancePCAndLine(dbg_line_column.line, dbg_line_column.column);
+}
+
+fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) InnerError!void {
+    const delta_line = @intCast(i32, line) - @intCast(i32, emit.prev_di_line);
+    const delta_pc: usize = emit.code.items.len - emit.prev_di_pc;
+    switch (emit.debug_output) {
+        .dwarf => |dbg_out| {
+            // TODO Look into using the DWARF special opcodes to compress this data.
+            // It lets you emit single-byte opcodes that add different numbers to
+            // both the PC and the line number at the same time.
+            try dbg_out.dbg_line.ensureUnusedCapacity(11);
+            dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_pc);
+            leb128.writeULEB128(dbg_out.dbg_line.writer(), delta_pc) catch unreachable;
+            if (delta_line != 0) {
+                dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_line);
+                leb128.writeILEB128(dbg_out.dbg_line.writer(), delta_line) catch unreachable;
+            }
+            dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.copy);
+            emit.prev_di_pc = emit.code.items.len;
+            emit.prev_di_line = line;
+            emit.prev_di_column = column;
+            emit.prev_di_pc = emit.code.items.len;
+        },
+        .plan9 => |dbg_out| {
+            if (delta_pc <= 0) return; // only do this when the pc changes
+            // we have already checked the target in the linker to make sure it is compatable
+            const quant = @import("../../link/Plan9/aout.zig").getPCQuant(emit.target.cpu.arch) catch unreachable;
+
+            // increasing the line number
+            try @import("../../link/Plan9.zig").changeLine(dbg_out.dbg_line, delta_line);
+            // increasing the pc
+            const d_pc_p9 = @intCast(i64, delta_pc) - quant;
+            if (d_pc_p9 > 0) {
+                // minus one because if its the last one, we want to leave space to change the line which is one quanta
+                try dbg_out.dbg_line.append(@intCast(u8, @divExact(d_pc_p9, quant) + 128) - quant);
+                if (dbg_out.pcop_change_index.*) |pci|
+                    dbg_out.dbg_line.items[pci] += 1;
+                dbg_out.pcop_change_index.* = @intCast(u32, dbg_out.dbg_line.items.len - 1);
+            } else if (d_pc_p9 == 0) {
+                // we don't need to do anything, because adding the quant does it for us
+            } else unreachable;
+            if (dbg_out.start_line.* == null)
+                dbg_out.start_line.* = emit.prev_di_line;
+            dbg_out.end_line.* = line;
+            // only do this if the pc changed
+            emit.prev_di_line = line;
+            emit.prev_di_column = column;
+            emit.prev_di_pc = emit.code.items.len;
+        },
+        .none => {},
+    }
+}
+
+fn mirDbgPrologueEnd(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .dbg_prologue_end);
+    switch (emit.debug_output) {
+        .dwarf => |dbg_out| {
+            try dbg_out.dbg_line.append(DW.LNS.set_prologue_end);
+            try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
+        },
+        .plan9 => {},
+        .none => {},
+    }
+}
+
+fn mirDbgEpilogueBegin(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .dbg_epilogue_begin);
+    switch (emit.debug_output) {
+        .dwarf => |dbg_out| {
+            try dbg_out.dbg_line.append(DW.LNS.set_epilogue_begin);
+            try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
+        },
+        .plan9 => {},
+        .none => {},
+    }
+}
+
+fn mirArgDbgInfo(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .arg_dbg_info);
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const arg_dbg_info = emit.mir.extraData(Mir.ArgDbgInfo, payload).data;
+    const mcv = emit.mir.function.args[arg_dbg_info.arg_index];
+    try emit.genArgDbgInfo(arg_dbg_info.air_inst, mcv);
+}
+
+fn genArgDbgInfo(emit: *Emit, inst: Air.Inst.Index, mcv: MCValue) !void {
+    const ty_str = emit.mir.function.air.instructions.items(.data)[inst].ty_str;
+    const zir = &emit.mir.function.mod_fn.owner_decl.getFileScope().zir;
+    const name = zir.nullTerminatedString(ty_str.str);
+    const name_with_null = name.ptr[0 .. name.len + 1];
+    const ty = emit.mir.function.air.getRefType(ty_str.ty);
+
+    switch (mcv) {
+        .register => |reg| {
+            switch (emit.debug_output) {
+                .dwarf => |dbg_out| {
+                    try dbg_out.dbg_info.ensureUnusedCapacity(3);
+                    dbg_out.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter);
+                    dbg_out.dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
+                        1, // ULEB128 dwarf expression length
+                        reg.dwarfLocOp(),
+                    });
+                    try dbg_out.dbg_info.ensureUnusedCapacity(5 + name_with_null.len);
+                    try emit.addDbgInfoTypeReloc(ty); // DW.AT.type,  DW.FORM.ref4
+                    dbg_out.dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string
+                },
+                .plan9 => {},
+                .none => {},
+            }
+        },
+        .stack_offset => {
+            switch (emit.debug_output) {
+                .dwarf => {},
+                .plan9 => {},
+                .none => {},
+            }
+        },
+        else => {},
+    }
+}
+
+/// Adds a Type to the .debug_info at the current position. The bytes will be populated later,
+/// after codegen for this symbol is done.
+fn addDbgInfoTypeReloc(emit: *Emit, ty: Type) !void {
+    switch (emit.debug_output) {
+        .dwarf => |dbg_out| {
+            assert(ty.hasCodeGenBits());
+            const index = dbg_out.dbg_info.items.len;
+            try dbg_out.dbg_info.resize(index + 4); // DW.AT.type,  DW.FORM.ref4
+
+            const gop = try dbg_out.dbg_info_type_relocs.getOrPut(emit.bin_file.allocator, ty);
+            if (!gop.found_existing) {
+                gop.value_ptr.* = .{
+                    .off = undefined,
+                    .relocs = .{},
+                };
+            }
+            try gop.value_ptr.relocs.append(emit.bin_file.allocator, @intCast(u32, index));
+        },
+        .plan9 => {},
+        .none => {},
+    }
+}
src/arch/x86_64/Mir.zig
@@ -0,0 +1,379 @@
+//! Machine Intermediate Representation.
+//! This data is produced by x86_64 Codegen and consumed by x86_64 Isel.
+//! These instructions have a 1:1 correspondence with machine code instructions
+//! for the target. MIR can be lowered to source-annotated textual assembly code
+//! instructions, or it can be lowered to machine code.
+//! The main purpose of MIR is to postpone the assignment of offsets until Isel,
+//! so that, for example, the smaller encodings of jump instructions can be used.
+
+const Mir = @This();
+const std = @import("std");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+
+const bits = @import("bits.zig");
+const Air = @import("../../Air.zig");
+const CodeGen = @import("CodeGen.zig");
+const Register = bits.Register;
+
+function: *const CodeGen,
+instructions: std.MultiArrayList(Inst).Slice,
+/// The meaning of this data is determined by `Inst.Tag` value.
+extra: []const u32,
+
+pub const Inst = struct {
+    tag: Tag,
+    /// This is 3 fields, and the meaning of each depends on `tag`.
+    /// reg1: Register
+    /// reg2: Register
+    /// flags: u2
+    ops: u16,
+    /// The meaning of this depends on `tag` and `ops`.
+    data: Data,
+
+    pub const Tag = enum(u16) {
+        /// ops flags:  form:
+        ///       0b00  reg1, reg2
+        ///       0b00  reg1, imm32
+        ///       0b01  reg1, [reg2 + imm32]
+        ///       0b01  reg1, [ds:imm32]
+        ///       0b10  [reg1 + imm32], reg2
+        ///       0b10  [reg1 + 0], imm32
+        ///       0b11  [reg1 + imm32], imm32
+        /// Notes:
+        ///  * If reg2 is `none` then it means Data field `imm` is used as the immediate.
+        ///  * When two imm32 values are required, Data field `payload` points at `ImmPair`.
+        adc,
+
+        /// form: reg1, [reg2 + scale*rcx + imm32]
+        /// ops flags  scale
+        ///      0b00      1
+        ///      0b01      2
+        ///      0b10      4
+        ///      0b11      8
+        adc_scale_src,
+
+        /// form: [reg1 + scale*rax + imm32], reg2
+        /// form: [reg1 + scale*rax + 0], imm32
+        /// ops flags  scale
+        ///      0b00      1
+        ///      0b01      2
+        ///      0b10      4
+        ///      0b11      8
+        /// Notes:
+        ///  * If reg2 is `none` then it means Data field `imm` is used as the immediate.
+        adc_scale_dst,
+
+        /// form: [reg1 + scale*rax + imm32], imm32
+        /// ops flags  scale
+        ///      0b00      1
+        ///      0b01      2
+        ///      0b10      4
+        ///      0b11      8
+        /// Notes:
+        ///  * Data field `payload` points at `ImmPair`.
+        adc_scale_imm,
+
+        // The following instructions all have the same encoding as `adc`.
+
+        add,
+        add_scale_src,
+        add_scale_dst,
+        add_scale_imm,
+        sub,
+        sub_scale_src,
+        sub_scale_dst,
+        sub_scale_imm,
+        xor,
+        xor_scale_src,
+        xor_scale_dst,
+        xor_scale_imm,
+        @"and",
+        and_scale_src,
+        and_scale_dst,
+        and_scale_imm,
+        @"or",
+        or_scale_src,
+        or_scale_dst,
+        or_scale_imm,
+        rol,
+        rol_scale_src,
+        rol_scale_dst,
+        rol_scale_imm,
+        ror,
+        ror_scale_src,
+        ror_scale_dst,
+        ror_scale_imm,
+        rcl,
+        rcl_scale_src,
+        rcl_scale_dst,
+        rcl_scale_imm,
+        rcr,
+        rcr_scale_src,
+        rcr_scale_dst,
+        rcr_scale_imm,
+        shl,
+        shl_scale_src,
+        shl_scale_dst,
+        shl_scale_imm,
+        sal,
+        sal_scale_src,
+        sal_scale_dst,
+        sal_scale_imm,
+        shr,
+        shr_scale_src,
+        shr_scale_dst,
+        shr_scale_imm,
+        sar,
+        sar_scale_src,
+        sar_scale_dst,
+        sar_scale_imm,
+        sbb,
+        sbb_scale_src,
+        sbb_scale_dst,
+        sbb_scale_imm,
+        cmp,
+        cmp_scale_src,
+        cmp_scale_dst,
+        cmp_scale_imm,
+        mov,
+        mov_scale_src,
+        mov_scale_dst,
+        mov_scale_imm,
+        lea,
+        lea_scale_src,
+        lea_scale_dst,
+        lea_scale_imm,
+
+        /// ops flags: form:
+        ///      0bX0  reg1
+        ///      0bX1  [reg1 + imm32]
+        imul,
+        idiv,
+
+        /// ops flags:  form:
+        ///      0b00  reg1, reg2
+        ///      0b01  reg1, [reg2 + imm32]
+        ///      0b01  reg1, [imm32] if reg2 is none
+        ///      0b10  reg1, reg2, imm32
+        ///      0b11  reg1, [reg2 + imm32], imm32
+        imul_complex,
+
+        /// ops flags:  form:
+        ///      0bX0   reg1, [rip + imm32]
+        ///      0bX1   reg1, [rip + reloc]
+        /// Notes:
+        /// * if flags are 0bX1, `Data` contains `got_entry` for linker to generate
+        ///   valid relocation for.
+        /// TODO handle more cases
+        lea_rip,
+
+        /// ops flags:  form:
+        ///      0bX0   reg1, imm64
+        ///      0bX1   rax, moffs64
+        /// Notes:
+        ///   * If reg1 is 64-bit, the immediate is 64-bit and stored
+        ///     within extra data `Imm64`.
+        ///   * For 0bX1, reg1 (or reg2) need to be
+        ///     a version of rax. If reg1 == .none, then reg2 == .rax,
+        ///     or vice versa.
+        /// TODO handle scaling
+        movabs,
+
+        /// ops flags: 0bX0:
+        /// - Uses the `inst` Data tag as the jump target.
+        /// - reg1 and reg2 are ignored.
+        /// ops flags: 0bX1:
+        /// - reg1 is the jump target, reg2 and data are ignored.
+        /// - if reg1 is none, [imm]
+        jmp,
+        call,
+
+        /// ops flags:
+        ///     0b00 gte
+        ///     0b01 gt
+        ///     0b10 lt
+        ///     0b11 lte
+        cond_jmp_greater_less,
+        cond_set_byte_greater_less,
+
+        /// ops flags:
+        ///     0b00 above or equal
+        ///     0b01 above
+        ///     0b10 below
+        ///     0b11 below or equal
+        cond_jmp_above_below,
+        cond_set_byte_above_below,
+
+        /// ops flags:
+        ///     0bX0 ne
+        ///     0bX1 eq
+        cond_jmp_eq_ne,
+        cond_set_byte_eq_ne,
+
+        /// ops flags:  form:
+        ///       0b00   reg1
+        ///       0b01   [reg1 + imm32]
+        ///       0b10   imm32
+        /// Notes:
+        ///  * If 0b10 is specified and the tag is push, pushes immediate onto the stack
+        ///    using the mnemonic PUSH imm32.
+        push,
+        pop,
+
+        /// ops flags:  form:
+        ///       0b00  retf imm16
+        ///       0b01  retf
+        ///       0b10  retn imm16
+        ///       0b11  retn
+        ret,
+
+        /// Fast system call
+        syscall,
+
+        /// ops flags:  form:
+        ///       0b00  reg1, reg2
+        ///       0b00  reg1, imm32
+        ///       0b01  reg1, [reg2 + imm32]
+        ///       0b01  reg1, [ds:imm32]
+        ///       0b10  [reg1 + imm32], reg2
+        ///       0b10  [reg1 + 0], imm32
+        ///       0b11  [reg1 + imm32], imm32
+        /// Notes:
+        ///  * If reg2 is `none` then it means Data field `imm` is used as the immediate.
+        ///  * When two imm32 values are required, Data field `payload` points at `ImmPair`.
+        @"test",
+
+        /// Breakpoint
+        brk,
+
+        /// Pseudo-instructions
+        /// call extern function
+        /// Notes:
+        ///   * target of the call is stored as `extern_fn` in `Data` union.
+        call_extern,
+
+        /// end of prologue
+        dbg_prologue_end,
+
+        /// start of epilogue
+        dbg_epilogue_begin,
+
+        /// update debug line
+        dbg_line,
+
+        /// arg debug info
+        arg_dbg_info,
+    };
+
+    /// The position of an MIR instruction within the `Mir` instructions array.
+    pub const Index = u32;
+
+    /// All instructions have a 4-byte payload, which is contained within
+    /// this union. `Tag` determines which union field is active, as well as
+    /// how to interpret the data within.
+    pub const Data = union {
+        /// Another instruction.
+        inst: Index,
+        /// A 32-bit immediate value.
+        imm: i32,
+        /// An extern function.
+        /// Index into the linker's string table.
+        extern_fn: u32,
+        /// Entry in the GOT table by index.
+        got_entry: u32,
+        /// Index into `extra`. Meaning of what can be found there is context-dependent.
+        payload: u32,
+    };
+
+    // Make sure we don't accidentally make instructions bigger than expected.
+    // Note that in Debug builds, Zig is allowed to insert a secret field for safety checks.
+    comptime {
+        if (builtin.mode != .Debug) {
+            assert(@sizeOf(Inst) == 8);
+        }
+    }
+};
+
+pub const ImmPair = struct {
+    dest_off: i32,
+    operand: i32,
+};
+
+pub const Imm64 = struct {
+    msb: u32,
+    lsb: u32,
+
+    pub fn encode(v: u64) Imm64 {
+        return .{
+            .msb = @truncate(u32, v >> 32),
+            .lsb = @truncate(u32, v),
+        };
+    }
+
+    pub fn decode(imm: Imm64) u64 {
+        var res: u64 = 0;
+        res |= (@intCast(u64, imm.msb) << 32);
+        res |= @intCast(u64, imm.lsb);
+        return res;
+    }
+};
+
+pub const DbgLineColumn = struct {
+    line: u32,
+    column: u32,
+};
+
+pub const ArgDbgInfo = struct {
+    air_inst: Air.Inst.Index,
+    arg_index: u32,
+};
+
+pub fn deinit(mir: *Mir, gpa: *std.mem.Allocator) void {
+    mir.instructions.deinit(gpa);
+    gpa.free(mir.extra);
+    mir.* = undefined;
+}
+
+pub const Ops = struct {
+    reg1: Register = .none,
+    reg2: Register = .none,
+    flags: u2 = 0b00,
+
+    pub fn encode(self: Ops) u16 {
+        var ops: u16 = 0;
+        ops |= @intCast(u16, @enumToInt(self.reg1)) << 9;
+        ops |= @intCast(u16, @enumToInt(self.reg2)) << 2;
+        ops |= self.flags;
+        return ops;
+    }
+
+    pub fn decode(ops: u16) Ops {
+        const reg1 = @intToEnum(Register, @truncate(u7, ops >> 9));
+        const reg2 = @intToEnum(Register, @truncate(u7, ops >> 2));
+        const flags = @truncate(u2, ops);
+        return .{
+            .reg1 = reg1,
+            .reg2 = reg2,
+            .flags = flags,
+        };
+    }
+};
+
+pub fn extraData(mir: Mir, comptime T: type, index: usize) struct { data: T, end: usize } {
+    const fields = std.meta.fields(T);
+    var i: usize = index;
+    var result: T = undefined;
+    inline for (fields) |field| {
+        @field(result, field.name) = switch (field.field_type) {
+            u32 => mir.extra[i],
+            i32 => @bitCast(i32, mir.extra[i]),
+            else => @compileError("bad field type"),
+        };
+        i += 1;
+    }
+    return .{
+        .data = result,
+        .end = i,
+    };
+}
src/codegen.zig
@@ -117,7 +117,7 @@ pub fn generateFunction(
         //.thumb => return Function(.thumb).generate(bin_file, src_loc, func, air, liveness, code, debug_output),
         //.thumbeb => return Function(.thumbeb).generate(bin_file, src_loc, func, air, liveness, code, debug_output),
         //.i386 => return Function(.i386).generate(bin_file, src_loc, func, air, liveness, code, debug_output),
-        .x86_64 => return @import("arch/x86_64/CodeGen.zig").generate(.x86_64, bin_file, src_loc, func, air, liveness, code, debug_output),
+        .x86_64 => return @import("arch/x86_64/CodeGen.zig").generate(bin_file, src_loc, func, air, liveness, code, debug_output),
         //.xcore => return Function(.xcore).generate(bin_file, src_loc, func, air, liveness, code, debug_output),
         //.nvptx => return Function(.nvptx).generate(bin_file, src_loc, func, air, liveness, code, debug_output),
         //.nvptx64 => return Function(.nvptx64).generate(bin_file, src_loc, func, air, liveness, code, debug_output),