Commit 08ea1a2eab

Jakub Konka <kubkon@jakubkonka.com>
2021-12-29 13:38:49
stage2: add separate tag for MI encoding
To request memory-immediate encoding at the MIR side, we should now use a new tag such as `mov_mem_imm` where the size of the memory pointer is encoded as the flags: ``` 0b00 => .byte_ptr, 0b01 => .word_ptr, 0b10 => .dword_ptr, 0b11 => .qword_ptr, ```
1 parent be5130e
Changed files (4)
src/arch/x86_64/CodeGen.zig
@@ -1646,7 +1646,7 @@ fn genBinMathOpMir(
                     _ = try self.addInst(.{
                         .tag = mir_tag,
                         .ops = (Mir.Ops{
-                            .reg1 = registerAlias(dst_reg, 4),
+                            .reg1 = dst_reg.to32(),
                         }).encode(),
                         .data = .{ .imm = @intCast(i32, imm) },
                     });
@@ -2807,10 +2807,10 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
                         .operand = @bitCast(i32, @intCast(u32, x_big)),
                     });
                     _ = try self.addInst(.{
-                        .tag = .mov,
+                        .tag = .mov_mem_imm,
                         .ops = (Mir.Ops{
                             .reg1 = .rbp,
-                            .flags = 0b11,
+                            .flags = 0b10,
                         }).encode(),
                         .data = .{ .payload = payload },
                     });
@@ -2828,10 +2828,10 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
                             .operand = @bitCast(i32, @truncate(u32, x_big >> 32)),
                         });
                         _ = try self.addInst(.{
-                            .tag = .mov,
+                            .tag = .mov_mem_imm,
                             .ops = (Mir.Ops{
                                 .reg1 = .rbp,
-                                .flags = 0b11,
+                                .flags = 0b10,
                             }).encode(),
                             .data = .{ .payload = payload },
                         });
@@ -2842,10 +2842,10 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
                             .operand = @bitCast(i32, @truncate(u32, x_big)),
                         });
                         _ = try self.addInst(.{
-                            .tag = .mov,
+                            .tag = .mov_mem_imm,
                             .ops = (Mir.Ops{
                                 .reg1 = .rbp,
-                                .flags = 0b11,
+                                .flags = 0b10,
                             }).encode(),
                             .data = .{ .payload = payload },
                         });
@@ -2955,11 +2955,10 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
             }
             if (x <= math.maxInt(i32)) {
                 // Next best case: if we set the lower four bytes, the upper four will be zeroed.
-                // TODO I am not quite sure why we need to set the size of the register here...
                 _ = try self.addInst(.{
                     .tag = .mov,
                     .ops = (Mir.Ops{
-                        .reg1 = registerAlias(reg, 4),
+                        .reg1 = reg.to32(),
                     }).encode(),
                     .data = .{ .imm = @intCast(i32, x) },
                 });
@@ -2985,9 +2984,10 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
             // We need the offset from RIP in a signed i32 twos complement.
             const payload = try self.addExtra(Mir.Imm64.encode(code_offset));
             _ = try self.addInst(.{
-                .tag = .lea_rip,
+                .tag = .lea,
                 .ops = (Mir.Ops{
                     .reg1 = reg,
+                    .flags = 0b01,
                 }).encode(),
                 .data = .{ .payload = payload },
             });
@@ -3011,10 +3011,10 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
             if (self.bin_file.options.pie) {
                 // TODO we should flag up `x` as GOT symbol entry explicitly rather than as a hack.
                 _ = try self.addInst(.{
-                    .tag = .lea_rip,
+                    .tag = .lea,
                     .ops = (Mir.Ops{
                         .reg1 = reg,
-                        .flags = 0b01,
+                        .flags = 0b10,
                     }).encode(),
                     .data = .{ .got_entry = @intCast(u32, x) },
                 });
src/arch/x86_64/Emit.zig
@@ -76,6 +76,16 @@ pub fn emitMir(emit: *Emit) InnerError!void {
             .cmp => try emit.mirArith(.cmp, inst),
             .mov => try emit.mirArith(.mov, inst),
 
+            .adc_mem_imm => try emit.mirArithMemImm(.adc, inst),
+            .add_mem_imm => try emit.mirArithMemImm(.add, inst),
+            .sub_mem_imm => try emit.mirArithMemImm(.sub, inst),
+            .xor_mem_imm => try emit.mirArithMemImm(.xor, inst),
+            .and_mem_imm => try emit.mirArithMemImm(.@"and", inst),
+            .or_mem_imm => try emit.mirArithMemImm(.@"or", inst),
+            .sbb_mem_imm => try emit.mirArithMemImm(.sbb, inst),
+            .cmp_mem_imm => try emit.mirArithMemImm(.cmp, inst),
+            .mov_mem_imm => try emit.mirArithMemImm(.mov, inst),
+
             .adc_scale_src => try emit.mirArithScaleSrc(.adc, inst),
             .add_scale_src => try emit.mirArithScaleSrc(.add, inst),
             .sub_scale_src => try emit.mirArithScaleSrc(.sub, inst),
@@ -109,7 +119,6 @@ pub fn emitMir(emit: *Emit) InnerError!void {
             .movabs => try emit.mirMovabs(inst),
 
             .lea => try emit.mirLea(inst),
-            .lea_rip => try emit.mirLeaRip(inst),
 
             .imul_complex => try emit.mirIMulComplex(inst),
 
@@ -170,6 +179,14 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) InnerError {
     return error.EmitFail;
 }
 
+fn failWithLoweringError(emit: *Emit, err: LoweringError) InnerError {
+    return switch (err) {
+        error.RaxOperandExpected => emit.fail("Register.rax expected as destination operand", .{}),
+        error.OperandSizeMismatch => emit.fail("operand size mismatch", .{}),
+        else => |e| e,
+    };
+}
+
 fn fixupRelocs(emit: *Emit) InnerError!void {
     // TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size.
     // This should be reversed like it is done in aarch64 MIR emit code: start with the smallest
@@ -185,15 +202,15 @@ fn fixupRelocs(emit: *Emit) InnerError!void {
 }
 
 fn mirBrk(emit: *Emit) InnerError!void {
-    return lowerToZoEnc(.brk, emit.code);
+    return lowerToZoEnc(.brk, emit.code) catch |err| emit.failWithLoweringError(err);
 }
 
 fn mirNop(emit: *Emit) InnerError!void {
-    return lowerToZoEnc(.nop, emit.code);
+    return lowerToZoEnc(.nop, emit.code) catch |err| emit.failWithLoweringError(err);
 }
 
 fn mirSyscall(emit: *Emit) InnerError!void {
-    return lowerToZoEnc(.syscall, emit.code);
+    return lowerToZoEnc(.syscall, emit.code) catch |err| emit.failWithLoweringError(err);
 }
 
 fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
@@ -201,18 +218,24 @@ fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     switch (ops.flags) {
         0b00 => {
             // PUSH/POP reg
-            return lowerToOEnc(tag, ops.reg1, emit.code);
+            return lowerToOEnc(tag, ops.reg1, emit.code) catch |err| emit.failWithLoweringError(err);
         },
         0b01 => {
             // PUSH/POP r/m64
             const imm = emit.mir.instructions.items(.data)[inst].imm;
-            return lowerToMEnc(tag, RegisterOrMemory.mem(ops.reg1, imm), emit.code);
+            const ptr_size: Memory.PtrSize = switch (immOpSize(imm)) {
+                16 => .word_ptr,
+                else => .qword_ptr,
+            };
+            return lowerToMEnc(tag, RegisterOrMemory.mem(ops.reg1, imm, ptr_size), emit.code) catch |err|
+                emit.failWithLoweringError(err);
         },
         0b10 => {
             // PUSH imm32
             assert(tag == .push);
             const imm = emit.mir.instructions.items(.data)[inst].imm;
-            return lowerToIEnc(.push, imm, emit.code);
+            return lowerToIEnc(.push, imm, emit.code) catch |err|
+                emit.failWithLoweringError(err);
         },
         0b11 => unreachable,
     }
@@ -223,7 +246,8 @@ fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.I
     if (tag == .push) {
         for (callee_preserved_regs) |reg, i| {
             if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
-            try lowerToOEnc(.push, reg, emit.code);
+            lowerToOEnc(.push, reg, emit.code) catch |err|
+                return emit.failWithLoweringError(err);
         }
     } else {
         // pop in the reverse direction
@@ -231,7 +255,8 @@ fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.I
         while (i > 0) : (i -= 1) {
             const reg = callee_preserved_regs[i - 1];
             if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue;
-            try lowerToOEnc(.pop, reg, emit.code);
+            lowerToOEnc(.pop, reg, emit.code) catch |err|
+                return emit.failWithLoweringError(err);
         }
     }
 }
@@ -242,7 +267,8 @@ fn mirJmpCall(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     if (flag == 0) {
         const target = emit.mir.instructions.items(.data)[inst].inst;
         const source = emit.code.items.len;
-        try lowerToDEnc(tag, 0, emit.code);
+        lowerToDEnc(tag, 0, emit.code) catch |err|
+            return emit.failWithLoweringError(err);
         try emit.relocs.append(emit.bin_file.allocator, .{
             .source = source,
             .target = target,
@@ -254,10 +280,15 @@ fn mirJmpCall(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     if (ops.reg1 == .none) {
         // JMP/CALL [imm]
         const imm = emit.mir.instructions.items(.data)[inst].imm;
-        return lowerToMEnc(tag, RegisterOrMemory.mem(null, imm), emit.code);
+        const ptr_size: Memory.PtrSize = switch (immOpSize(imm)) {
+            16 => .word_ptr,
+            else => .qword_ptr,
+        };
+        return lowerToMEnc(tag, RegisterOrMemory.mem(null, imm, ptr_size), emit.code) catch |err|
+            emit.failWithLoweringError(err);
     }
     // JMP/CALL reg
-    return lowerToMEnc(tag, RegisterOrMemory.reg(ops.reg1), emit.code);
+    return lowerToMEnc(tag, RegisterOrMemory.reg(ops.reg1), emit.code) catch |err| emit.failWithLoweringError(err);
 }
 
 fn mirCondJmp(emit: *Emit, mir_tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
@@ -283,7 +314,8 @@ fn mirCondJmp(emit: *Emit, mir_tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerErr
         else => unreachable,
     };
     const source = emit.code.items.len;
-    try lowerToDEnc(tag, 0, emit.code);
+    lowerToDEnc(tag, 0, emit.code) catch |err|
+        return emit.failWithLoweringError(err);
     try emit.relocs.append(emit.bin_file.allocator, .{
         .source = source,
         .target = target,
@@ -313,7 +345,8 @@ fn mirCondSetByte(emit: *Emit, mir_tag: Mir.Inst.Tag, inst: Mir.Inst.Index) Inne
         },
         else => unreachable,
     };
-    return lowerToMEnc(tag, RegisterOrMemory.reg(ops.reg1), emit.code);
+    return lowerToMEnc(tag, RegisterOrMemory.reg(ops.reg1.to8()), emit.code) catch |err|
+        emit.failWithLoweringError(err);
 }
 
 fn mirTest(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
@@ -329,9 +362,11 @@ fn mirTest(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
                 if (ops.reg1.to64() == .rax) {
                     // TEST rax, imm32
                     // I
-                    return lowerToIEnc(.@"test", imm, emit.code);
+                    return lowerToIEnc(.@"test", imm, emit.code) catch |err|
+                        emit.failWithLoweringError(err);
                 }
-                return lowerToMiEnc(.@"test", RegisterOrMemory.reg(ops.reg1), imm, emit.code);
+                return lowerToMiEnc(.@"test", RegisterOrMemory.reg(ops.reg1), imm, emit.code) catch |err|
+                    emit.failWithLoweringError(err);
             }
             // TEST r/m64, r64
             return emit.fail("TODO TEST r/m64, r64", .{});
@@ -349,207 +384,682 @@ fn mirRet(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             // RETF imm16
             // I
             const imm = emit.mir.instructions.items(.data)[inst].imm;
-            return lowerToIEnc(.ret_far, imm, emit.code);
+            return lowerToIEnc(.ret_far, imm, emit.code) catch |err| emit.failWithLoweringError(err);
+        },
+        0b01 => {
+            return lowerToZoEnc(.ret_far, emit.code) catch |err| emit.failWithLoweringError(err);
         },
-        0b01 => return lowerToZoEnc(.ret_far, emit.code),
         0b10 => {
             // RET imm16
             // I
             const imm = emit.mir.instructions.items(.data)[inst].imm;
-            return lowerToIEnc(.ret_near, imm, emit.code);
+            return lowerToIEnc(.ret_near, imm, emit.code) catch |err| emit.failWithLoweringError(err);
+        },
+        0b11 => {
+            return lowerToZoEnc(.ret_near, emit.code) catch |err| emit.failWithLoweringError(err);
         },
-        0b11 => return lowerToZoEnc(.ret_near, emit.code),
     }
 }
 
-const Tag = enum {
-    adc,
-    add,
-    sub,
-    xor,
-    @"and",
-    @"or",
-    sbb,
-    cmp,
-    mov,
-    lea,
-    jmp_near,
-    call_near,
-    push,
-    pop,
-    @"test",
-    brk,
-    nop,
-    imul,
-    syscall,
-    ret_near,
-    ret_far,
-    jo,
-    jno,
-    jb,
-    jbe,
-    jc,
-    jnae,
-    jnc,
-    jae,
-    je,
-    jz,
-    jne,
-    jnz,
-    jna,
-    jnb,
-    jnbe,
-    ja,
-    js,
-    jns,
-    jpe,
-    jp,
-    jpo,
-    jnp,
-    jnge,
-    jl,
-    jge,
-    jnl,
-    jle,
-    jng,
-    jg,
-    jnle,
-    seto,
-    setno,
-    setb,
-    setc,
-    setnae,
-    setnb,
-    setnc,
-    setae,
-    sete,
-    setz,
-    setne,
-    setnz,
-    setbe,
-    setna,
-    seta,
-    setnbe,
-    sets,
-    setns,
-    setp,
-    setpe,
-    setnp,
-    setop,
-    setl,
-    setnge,
-    setnl,
-    setge,
-    setle,
-    setng,
-    setnle,
-    setg,
-
-    fn isSetCC(tag: Tag) bool {
-        return switch (tag) {
-            .seto,
-            .setno,
-            .setb,
-            .setc,
-            .setnae,
-            .setnb,
-            .setnc,
-            .setae,
-            .sete,
-            .setz,
-            .setne,
-            .setnz,
-            .setbe,
-            .setna,
-            .seta,
-            .setnbe,
-            .sets,
-            .setns,
-            .setp,
-            .setpe,
-            .setnp,
-            .setop,
-            .setl,
-            .setnge,
-            .setnl,
-            .setge,
-            .setle,
-            .setng,
-            .setnle,
-            .setg,
-            => true,
-            else => false,
-        };
+fn mirArith(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    switch (ops.flags) {
+        0b00 => {
+            if (ops.reg2 == .none) {
+                // mov reg1, imm32
+                // MI
+                const imm = emit.mir.instructions.items(.data)[inst].imm;
+                return lowerToMiEnc(tag, RegisterOrMemory.reg(ops.reg1), imm, emit.code) catch |err|
+                    emit.failWithLoweringError(err);
+            }
+            // mov reg1, reg2
+            // RM
+            return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code) catch |err|
+                emit.failWithLoweringError(err);
+        },
+        0b01 => {
+            // mov reg1, [reg2 + imm32]
+            // RM
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2;
+            return lowerToRmEnc(
+                tag,
+                ops.reg1,
+                RegisterOrMemory.mem(src_reg, imm, Memory.PtrSize.fromBits(ops.reg1.size())),
+                emit.code,
+            ) catch |err| emit.failWithLoweringError(err);
+        },
+        0b10 => {
+            if (ops.reg2 == .none) {
+                return emit.fail("TODO unused variant: mov reg1, none, 0b10", .{});
+            }
+            // mov [reg1 + imm32], reg2
+            // MR
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            return lowerToMrEnc(
+                tag,
+                RegisterOrMemory.mem(ops.reg1, imm, Memory.PtrSize.fromBits(ops.reg2.size())),
+                ops.reg2,
+                emit.code,
+            ) catch |err| emit.failWithLoweringError(err);
+        },
+        0b11 => {
+            return emit.fail("TODO unused variant: mov reg1, reg2, 0b11", .{});
+        },
     }
-};
-
-const Encoding = enum {
-    /// OP
-    zo,
-
-    /// OP rel32
-    d,
-
-    /// OP r/m64
-    m,
-
-    /// OP r64
-    o,
-
-    /// OP imm32
-    i,
-
-    /// OP r/m64, imm32
-    mi,
-
-    /// OP r/m64, r64
-    mr,
-
-    /// OP r64, r/m64
-    rm,
-
-    /// OP r64, imm64
-    oi,
-
-    /// OP al/ax/eax/rax, moffs
-    fd,
-
-    /// OP moffs, al/ax/eax/rax
-    td,
-
-    /// OP r64, r/m64, imm32
-    rmi,
-};
+}
 
-const OpCode = union(enum) {
-    one_byte: u8,
-    two_byte: struct { _1: u8, _2: u8 },
+fn mirArithMemImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    assert(ops.reg2 == .none);
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const ptr_size: Memory.PtrSize = switch (ops.flags) {
+        0b00 => .byte_ptr,
+        0b01 => .word_ptr,
+        0b10 => .dword_ptr,
+        0b11 => .qword_ptr,
+    };
+    return lowerToMiEnc(
+        tag,
+        RegisterOrMemory.mem(ops.reg1, imm_pair.dest_off, ptr_size),
+        imm_pair.operand,
+        emit.code,
+    ) catch |err| emit.failWithLoweringError(err);
+}
 
-    fn oneByte(opc: u8) OpCode {
-        return .{ .one_byte = opc };
+fn immOpSize(imm: i64) u8 {
+    blk: {
+        _ = math.cast(i8, imm) catch break :blk;
+        return 8;
     }
-
-    fn twoByte(opc1: u8, opc2: u8) OpCode {
-        return .{ .two_byte = .{ ._1 = opc1, ._2 = opc2 } };
+    blk: {
+        _ = math.cast(i16, imm) catch break :blk;
+        return 16;
     }
-
-    fn encode(opc: OpCode, encoder: Encoder) void {
-        switch (opc) {
-            .one_byte => |v| encoder.opcode_1byte(v),
-            .two_byte => |v| encoder.opcode_2byte(v._1, v._2),
-        }
+    blk: {
+        _ = math.cast(i32, imm) catch break :blk;
+        return 32;
     }
+    return 64;
+}
 
-    fn encodeWithReg(opc: OpCode, encoder: Encoder, reg: Register) void {
-        assert(opc == .one_byte);
-        encoder.opcode_withReg(opc.one_byte, reg.lowId());
+// TODO
+fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const scale = ops.flags;
+    // OP reg1, [reg2 + scale*rcx + imm32]
+    const opc = getOpCode(tag, .rm, ops.reg1.size() == 8).?;
+    const imm = emit.mir.instructions.items(.data)[inst].imm;
+    const encoder = try Encoder.init(emit.code, 8);
+    encoder.rex(.{
+        .w = ops.reg1.size() == 64,
+        .r = ops.reg1.isExtended(),
+        .b = ops.reg2.isExtended(),
+    });
+    opc.encode(encoder);
+    if (imm <= math.maxInt(i8)) {
+        encoder.modRm_SIBDisp8(ops.reg1.lowId());
+        encoder.sib_scaleIndexBaseDisp8(scale, Register.rcx.lowId(), ops.reg2.lowId());
+        encoder.disp8(@intCast(i8, imm));
+    } else {
+        encoder.modRm_SIBDisp32(ops.reg1.lowId());
+        encoder.sib_scaleIndexBaseDisp32(scale, Register.rcx.lowId(), ops.reg2.lowId());
+        encoder.disp32(imm);
     }
-};
+}
 
-inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) ?OpCode {
-    switch (enc) {
-        .zo => return switch (tag) {
-            .ret_near => OpCode.oneByte(0xc3),
+// TODO
+fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const scale = ops.flags;
+    const imm = emit.mir.instructions.items(.data)[inst].imm;
+
+    if (ops.reg2 == .none) {
+        // OP [reg1 + scale*rax + 0], imm32
+        const opc = getOpCode(tag, .mi, ops.reg1.size() == 8).?;
+        const modrm_ext = getModRmExt(tag).?;
+        const encoder = try Encoder.init(emit.code, 8);
+        encoder.rex(.{
+            .w = ops.reg1.size() == 64,
+            .b = ops.reg1.isExtended(),
+        });
+        opc.encode(encoder);
+        encoder.modRm_SIBDisp0(modrm_ext);
+        encoder.sib_scaleIndexBase(scale, Register.rax.lowId(), ops.reg1.lowId());
+        if (imm <= math.maxInt(i8)) {
+            encoder.imm8(@intCast(i8, imm));
+        } else if (imm <= math.maxInt(i16)) {
+            encoder.imm16(@intCast(i16, imm));
+        } else {
+            encoder.imm32(imm);
+        }
+        return;
+    }
+
+    // OP [reg1 + scale*rax + imm32], reg2
+    const opc = getOpCode(tag, .mr, ops.reg1.size() == 8).?;
+    const encoder = try Encoder.init(emit.code, 8);
+    encoder.rex(.{
+        .w = ops.reg1.size() == 64,
+        .r = ops.reg2.isExtended(),
+        .b = ops.reg1.isExtended(),
+    });
+    opc.encode(encoder);
+    if (imm <= math.maxInt(i8)) {
+        encoder.modRm_SIBDisp8(ops.reg2.lowId());
+        encoder.sib_scaleIndexBaseDisp8(scale, Register.rax.lowId(), ops.reg1.lowId());
+        encoder.disp8(@intCast(i8, imm));
+    } else {
+        encoder.modRm_SIBDisp32(ops.reg2.lowId());
+        encoder.sib_scaleIndexBaseDisp32(scale, Register.rax.lowId(), ops.reg1.lowId());
+        encoder.disp32(imm);
+    }
+}
+
+// TODO
+fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const scale = ops.flags;
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const opc = getOpCode(tag, .mi, ops.reg1.size() == 8).?;
+    const modrm_ext = getModRmExt(tag).?;
+    const encoder = try Encoder.init(emit.code, 2);
+    encoder.rex(.{
+        .w = ops.reg1.size() == 64,
+        .b = ops.reg1.isExtended(),
+    });
+    opc.encode(encoder);
+    if (imm_pair.dest_off <= math.maxInt(i8)) {
+        encoder.modRm_SIBDisp8(modrm_ext);
+        encoder.sib_scaleIndexBaseDisp8(scale, Register.rax.lowId(), ops.reg1.lowId());
+        encoder.disp8(@intCast(i8, imm_pair.dest_off));
+    } else {
+        encoder.modRm_SIBDisp32(modrm_ext);
+        encoder.sib_scaleIndexBaseDisp32(scale, Register.rax.lowId(), ops.reg1.lowId());
+        encoder.disp32(imm_pair.dest_off);
+    }
+    encoder.imm32(imm_pair.operand);
+}
+
+fn mirMovabs(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .movabs);
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const imm: i64 = if (ops.reg1.size() == 64) blk: {
+        const payload = emit.mir.instructions.items(.data)[inst].payload;
+        const imm = emit.mir.extraData(Mir.Imm64, payload).data;
+        break :blk @bitCast(i64, imm.decode());
+    } else emit.mir.instructions.items(.data)[inst].imm;
+    if (ops.flags == 0b00) {
+        // movabs reg, imm64
+        // OI
+        return lowerToOiEnc(.mov, ops.reg1, imm, emit.code) catch |err| emit.failWithLoweringError(err);
+    }
+    if (ops.reg1 == .none) {
+        // movabs moffs64, rax
+        // TD
+        return lowerToTdEnc(.mov, imm, ops.reg2, emit.code) catch |err| emit.failWithLoweringError(err);
+    }
+    // movabs rax, moffs64
+    // FD
+    return lowerToFdEnc(.mov, ops.reg1, imm, emit.code) catch |err| emit.failWithLoweringError(err);
+}
+
+fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .imul_complex);
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    switch (ops.flags) {
+        0b00 => {
+            return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code) catch |err|
+                emit.failWithLoweringError(err);
+        },
+        0b10 => {
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            return lowerToRmiEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), imm, emit.code) catch |err|
+                emit.failWithLoweringError(err);
+        },
+        else => return emit.fail("TODO implement imul", .{}),
+    }
+}
+
+fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .lea);
+    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    switch (ops.flags) {
+        0b00 => {
+            // lea reg1, [reg2 + imm32]
+            // RM
+            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2;
+            return lowerToRmEnc(
+                .lea,
+                ops.reg1,
+                RegisterOrMemory.mem(src_reg, imm, Memory.PtrSize.fromBits(ops.reg1.size())),
+                emit.code,
+            ) catch |err| emit.failWithLoweringError(err);
+        },
+        0b01 => {
+            // lea reg1, [rip + imm32]
+            // RM
+            const start_offset = emit.code.items.len;
+            lowerToRmEnc(
+                .lea,
+                ops.reg1,
+                RegisterOrMemory.rip(0, Memory.PtrSize.fromBits(ops.reg1.size())),
+                emit.code,
+            ) catch |err| return emit.failWithLoweringError(err);
+            const end_offset = emit.code.items.len;
+            // Backpatch the displacement
+            const payload = emit.mir.instructions.items(.data)[inst].payload;
+            const imm = emit.mir.extraData(Mir.Imm64, payload).data.decode();
+            const disp = @intCast(i32, @intCast(i64, imm) - @intCast(i64, end_offset - start_offset));
+            mem.writeIntLittle(i32, emit.code.items[end_offset - 4 ..][0..4], disp);
+        },
+        0b10 => {
+            // lea reg1, [rip + reloc]
+            // RM
+            lowerToRmEnc(
+                .lea,
+                ops.reg1,
+                RegisterOrMemory.rip(0, Memory.PtrSize.fromBits(ops.reg1.size())),
+                emit.code,
+            ) catch |err| return emit.failWithLoweringError(err);
+            const end_offset = emit.code.items.len;
+            const got_entry = emit.mir.instructions.items(.data)[inst].got_entry;
+            if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
+                // TODO I think the reloc might be in the wrong place.
+                const decl = macho_file.active_decl.?;
+                try decl.link.macho.relocs.append(emit.bin_file.allocator, .{
+                    .offset = @intCast(u32, end_offset - 4),
+                    .target = .{ .local = got_entry },
+                    .addend = 0,
+                    .subtractor = null,
+                    .pcrel = true,
+                    .length = 2,
+                    .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT),
+                });
+            } else {
+                return emit.fail(
+                    "TODO implement lea reg, [rip + reloc] for linking backends different than MachO",
+                    .{},
+                );
+            }
+        },
+        0b11 => return emit.fail("TODO unused variant lea reg1, reg2, 0b11", .{}),
+    }
+}
+
+fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .call_extern);
+    const n_strx = emit.mir.instructions.items(.data)[inst].extern_fn;
+    const offset = blk: {
+        // callq
+        lowerToDEnc(.call_near, 0, emit.code) catch |err|
+            return emit.failWithLoweringError(err);
+        break :blk @intCast(u32, emit.code.items.len) - 4;
+    };
+    if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
+        // Add relocation to the decl.
+        try macho_file.active_decl.?.link.macho.relocs.append(emit.bin_file.allocator, .{
+            .offset = offset,
+            .target = .{ .global = n_strx },
+            .addend = 0,
+            .subtractor = null,
+            .pcrel = true,
+            .length = 2,
+            .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH),
+        });
+    } else {
+        return emit.fail("TODO implement call_extern for linking backends different than MachO", .{});
+    }
+}
+
+fn mirDbgLine(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .dbg_line);
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const dbg_line_column = emit.mir.extraData(Mir.DbgLineColumn, payload).data;
+    try emit.dbgAdvancePCAndLine(dbg_line_column.line, dbg_line_column.column);
+}
+
+fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) InnerError!void {
+    const delta_line = @intCast(i32, line) - @intCast(i32, emit.prev_di_line);
+    const delta_pc: usize = emit.code.items.len - emit.prev_di_pc;
+    switch (emit.debug_output) {
+        .dwarf => |dbg_out| {
+            // TODO Look into using the DWARF special opcodes to compress this data.
+            // It lets you emit single-byte opcodes that add different numbers to
+            // both the PC and the line number at the same time.
+            try dbg_out.dbg_line.ensureUnusedCapacity(11);
+            dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_pc);
+            leb128.writeULEB128(dbg_out.dbg_line.writer(), delta_pc) catch unreachable;
+            if (delta_line != 0) {
+                dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_line);
+                leb128.writeILEB128(dbg_out.dbg_line.writer(), delta_line) catch unreachable;
+            }
+            dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.copy);
+            emit.prev_di_pc = emit.code.items.len;
+            emit.prev_di_line = line;
+            emit.prev_di_column = column;
+            emit.prev_di_pc = emit.code.items.len;
+        },
+        .plan9 => |dbg_out| {
+            if (delta_pc <= 0) return; // only do this when the pc changes
+            // we have already checked the target in the linker to make sure it is compatable
+            const quant = @import("../../link/Plan9/aout.zig").getPCQuant(emit.target.cpu.arch) catch unreachable;
+
+            // increasing the line number
+            try @import("../../link/Plan9.zig").changeLine(dbg_out.dbg_line, delta_line);
+            // increasing the pc
+            const d_pc_p9 = @intCast(i64, delta_pc) - quant;
+            if (d_pc_p9 > 0) {
+                // minus one because if its the last one, we want to leave space to change the line which is one quanta
+                try dbg_out.dbg_line.append(@intCast(u8, @divExact(d_pc_p9, quant) + 128) - quant);
+                if (dbg_out.pcop_change_index.*) |pci|
+                    dbg_out.dbg_line.items[pci] += 1;
+                dbg_out.pcop_change_index.* = @intCast(u32, dbg_out.dbg_line.items.len - 1);
+            } else if (d_pc_p9 == 0) {
+                // we don't need to do anything, because adding the quant does it for us
+            } else unreachable;
+            if (dbg_out.start_line.* == null)
+                dbg_out.start_line.* = emit.prev_di_line;
+            dbg_out.end_line.* = line;
+            // only do this if the pc changed
+            emit.prev_di_line = line;
+            emit.prev_di_column = column;
+            emit.prev_di_pc = emit.code.items.len;
+        },
+        .none => {},
+    }
+}
+
+fn mirDbgPrologueEnd(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .dbg_prologue_end);
+    switch (emit.debug_output) {
+        .dwarf => |dbg_out| {
+            try dbg_out.dbg_line.append(DW.LNS.set_prologue_end);
+            try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
+        },
+        .plan9 => {},
+        .none => {},
+    }
+}
+
+fn mirDbgEpilogueBegin(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .dbg_epilogue_begin);
+    switch (emit.debug_output) {
+        .dwarf => |dbg_out| {
+            try dbg_out.dbg_line.append(DW.LNS.set_epilogue_begin);
+            try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
+        },
+        .plan9 => {},
+        .none => {},
+    }
+}
+
+fn mirArgDbgInfo(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
+    const tag = emit.mir.instructions.items(.tag)[inst];
+    assert(tag == .arg_dbg_info);
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const arg_dbg_info = emit.mir.extraData(Mir.ArgDbgInfo, payload).data;
+    const mcv = emit.mir.function.args[arg_dbg_info.arg_index];
+    try emit.genArgDbgInfo(arg_dbg_info.air_inst, mcv);
+}
+
+fn genArgDbgInfo(emit: *Emit, inst: Air.Inst.Index, mcv: MCValue) !void {
+    const ty_str = emit.mir.function.air.instructions.items(.data)[inst].ty_str;
+    const zir = &emit.mir.function.mod_fn.owner_decl.getFileScope().zir;
+    const name = zir.nullTerminatedString(ty_str.str);
+    const name_with_null = name.ptr[0 .. name.len + 1];
+    const ty = emit.mir.function.air.getRefType(ty_str.ty);
+
+    switch (mcv) {
+        .register => |reg| {
+            switch (emit.debug_output) {
+                .dwarf => |dbg_out| {
+                    try dbg_out.dbg_info.ensureUnusedCapacity(3);
+                    dbg_out.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter);
+                    dbg_out.dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
+                        1, // ULEB128 dwarf expression length
+                        reg.dwarfLocOp(),
+                    });
+                    try dbg_out.dbg_info.ensureUnusedCapacity(5 + name_with_null.len);
+                    try emit.addDbgInfoTypeReloc(ty); // DW.AT.type,  DW.FORM.ref4
+                    dbg_out.dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string
+                },
+                .plan9 => {},
+                .none => {},
+            }
+        },
+        .stack_offset => {
+            switch (emit.debug_output) {
+                .dwarf => {},
+                .plan9 => {},
+                .none => {},
+            }
+        },
+        else => {},
+    }
+}
+
+/// Adds a Type to the .debug_info at the current position. The bytes will be populated later,
+/// after codegen for this symbol is done.
+fn addDbgInfoTypeReloc(emit: *Emit, ty: Type) !void {
+    switch (emit.debug_output) {
+        .dwarf => |dbg_out| {
+            assert(ty.hasCodeGenBits());
+            const index = dbg_out.dbg_info.items.len;
+            try dbg_out.dbg_info.resize(index + 4); // DW.AT.type,  DW.FORM.ref4
+
+            const gop = try dbg_out.dbg_info_type_relocs.getOrPut(emit.bin_file.allocator, ty);
+            if (!gop.found_existing) {
+                gop.value_ptr.* = .{
+                    .off = undefined,
+                    .relocs = .{},
+                };
+            }
+            try gop.value_ptr.relocs.append(emit.bin_file.allocator, @intCast(u32, index));
+        },
+        .plan9 => {},
+        .none => {},
+    }
+}
+
+const Tag = enum {
+    adc,
+    add,
+    sub,
+    xor,
+    @"and",
+    @"or",
+    sbb,
+    cmp,
+    mov,
+    lea,
+    jmp_near,
+    call_near,
+    push,
+    pop,
+    @"test",
+    brk,
+    nop,
+    imul,
+    syscall,
+    ret_near,
+    ret_far,
+    jo,
+    jno,
+    jb,
+    jbe,
+    jc,
+    jnae,
+    jnc,
+    jae,
+    je,
+    jz,
+    jne,
+    jnz,
+    jna,
+    jnb,
+    jnbe,
+    ja,
+    js,
+    jns,
+    jpe,
+    jp,
+    jpo,
+    jnp,
+    jnge,
+    jl,
+    jge,
+    jnl,
+    jle,
+    jng,
+    jg,
+    jnle,
+    seto,
+    setno,
+    setb,
+    setc,
+    setnae,
+    setnb,
+    setnc,
+    setae,
+    sete,
+    setz,
+    setne,
+    setnz,
+    setbe,
+    setna,
+    seta,
+    setnbe,
+    sets,
+    setns,
+    setp,
+    setpe,
+    setnp,
+    setop,
+    setl,
+    setnge,
+    setnl,
+    setge,
+    setle,
+    setng,
+    setnle,
+    setg,
+
+    fn isSetCC(tag: Tag) bool {
+        return switch (tag) {
+            .seto,
+            .setno,
+            .setb,
+            .setc,
+            .setnae,
+            .setnb,
+            .setnc,
+            .setae,
+            .sete,
+            .setz,
+            .setne,
+            .setnz,
+            .setbe,
+            .setna,
+            .seta,
+            .setnbe,
+            .sets,
+            .setns,
+            .setp,
+            .setpe,
+            .setnp,
+            .setop,
+            .setl,
+            .setnge,
+            .setnl,
+            .setge,
+            .setle,
+            .setng,
+            .setnle,
+            .setg,
+            => true,
+            else => false,
+        };
+    }
+};
+
+const Encoding = enum {
+    /// OP
+    zo,
+
+    /// OP rel32
+    d,
+
+    /// OP r/m64
+    m,
+
+    /// OP r64
+    o,
+
+    /// OP imm32
+    i,
+
+    /// OP r/m64, imm32
+    mi,
+
+    /// OP r/m64, r64
+    mr,
+
+    /// OP r64, r/m64
+    rm,
+
+    /// OP r64, imm64
+    oi,
+
+    /// OP al/ax/eax/rax, moffs
+    fd,
+
+    /// OP moffs, al/ax/eax/rax
+    td,
+
+    /// OP r64, r/m64, imm32
+    rmi,
+};
+
+const OpCode = union(enum) {
+    one_byte: u8,
+    two_byte: struct { _1: u8, _2: u8 },
+
+    fn oneByte(opc: u8) OpCode {
+        return .{ .one_byte = opc };
+    }
+
+    fn twoByte(opc1: u8, opc2: u8) OpCode {
+        return .{ .two_byte = .{ ._1 = opc1, ._2 = opc2 } };
+    }
+
+    fn encode(opc: OpCode, encoder: Encoder) void {
+        switch (opc) {
+            .one_byte => |v| encoder.opcode_1byte(v),
+            .two_byte => |v| encoder.opcode_2byte(v._1, v._2),
+        }
+    }
+
+    fn encodeWithReg(opc: OpCode, encoder: Encoder, reg: Register) void {
+        assert(opc == .one_byte);
+        encoder.opcode_withReg(opc.one_byte, reg.lowId());
+    }
+};
+
+inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) ?OpCode {
+    switch (enc) {
+        .zo => return switch (tag) {
+            .ret_near => OpCode.oneByte(0xc3),
             .ret_far => OpCode.oneByte(0xcb),
             .brk => OpCode.oneByte(0xcc),
             .nop => OpCode.oneByte(0x90),
@@ -722,7 +1232,35 @@ const Memory = struct {
     reg: ?Register,
     rip: bool = false,
     disp: i32,
+    ptr_size: PtrSize,
     sib: ?ScaleIndexBase = null,
+
+    const PtrSize = enum {
+        byte_ptr,
+        word_ptr,
+        dword_ptr,
+        qword_ptr,
+
+        fn fromBits(in_bits: u64) PtrSize {
+            return switch (in_bits) {
+                8 => .byte_ptr,
+                16 => .word_ptr,
+                32 => .dword_ptr,
+                64 => .qword_ptr,
+                else => unreachable,
+            };
+        }
+
+        /// Returns size in bits.
+        fn size(ptr_size: PtrSize) u64 {
+            return switch (ptr_size) {
+                .byte_ptr => 8,
+                .word_ptr => 16,
+                .dword_ptr => 32,
+                .qword_ptr => 64,
+            };
+        }
+    };
 };
 
 const RegisterOrMemory = union(enum) {
@@ -733,33 +1271,42 @@ const RegisterOrMemory = union(enum) {
         return .{ .register = register };
     }
 
-    fn mem(register: ?Register, disp: i32) RegisterOrMemory {
+    fn mem(register: ?Register, disp: i32, ptr_size: Memory.PtrSize) RegisterOrMemory {
         return .{
             .memory = .{
                 .reg = register,
                 .disp = disp,
+                .ptr_size = ptr_size,
             },
         };
     }
 
-    fn rip(disp: i32) RegisterOrMemory {
+    fn rip(disp: i32, ptr_size: Memory.PtrSize) RegisterOrMemory {
         return .{
             .memory = .{
                 .reg = null,
                 .rip = true,
                 .disp = disp,
+                .ptr_size = ptr_size,
             },
         };
     }
 };
 
-fn lowerToZoEnc(tag: Tag, code: *std.ArrayList(u8)) InnerError!void {
+const LoweringError = error{
+    OutOfMemory,
+    Overflow,
+    OperandSizeMismatch,
+    RaxOperandExpected,
+};
+
+fn lowerToZoEnc(tag: Tag, code: *std.ArrayList(u8)) LoweringError!void {
     const opc = getOpCode(tag, .zo, false).?;
     const encoder = try Encoder.init(code, 1);
     opc.encode(encoder);
 }
 
-fn lowerToIEnc(tag: Tag, imm: i32, code: *std.ArrayList(u8)) InnerError!void {
+fn lowerToIEnc(tag: Tag, imm: i32, code: *std.ArrayList(u8)) LoweringError!void {
     if (tag == .ret_far or tag == .ret_near) {
         const encoder = try Encoder.init(code, 3);
         const opc = getOpCode(tag, .i, false).?;
@@ -782,928 +1329,543 @@ fn lowerToIEnc(tag: Tag, imm: i32, code: *std.ArrayList(u8)) InnerError!void {
     }
 }
 
-fn lowerToOEnc(tag: Tag, reg: Register, code: *std.ArrayList(u8)) InnerError!void {
-    if (reg.size() != 16 and reg.size() != 64) return error.EmitFail; // TODO correct for push/pop, but is it universal?
-    const opc = getOpCode(tag, .o, false).?;
-    const encoder = try Encoder.init(code, 3);
-    if (reg.size() == 16) {
-        encoder.opcode_1byte(0x66);
-    }
-    encoder.rex(.{
-        .w = false,
-        .b = reg.isExtended(),
-    });
-    opc.encodeWithReg(encoder, reg);
-}
-
-fn lowerToDEnc(tag: Tag, imm: i32, code: *std.ArrayList(u8)) InnerError!void {
-    const opc = getOpCode(tag, .d, false).?;
-    const encoder = try Encoder.init(code, 6);
-    opc.encode(encoder);
-    encoder.imm32(imm);
-}
-
-fn lowerToMEnc(tag: Tag, reg_or_mem: RegisterOrMemory, code: *std.ArrayList(u8)) InnerError!void {
-    const opc = getOpCode(tag, .m, false).?;
-    const modrm_ext = getModRmExt(tag).?;
-    switch (reg_or_mem) {
-        .register => |reg| {
-            // TODO clean this up!
-            if (reg.size() != 64) {
-                if (reg.size() != 8 and !tag.isSetCC()) return error.EmitFail;
-            }
-            const encoder = try Encoder.init(code, 3);
-            encoder.rex(.{
-                .w = tag.isSetCC(),
-                .b = reg.isExtended(),
-            });
-            opc.encode(encoder);
-            encoder.modRm_direct(modrm_ext, reg.lowId());
-        },
-        .memory => |mem_op| {
-            const encoder = try Encoder.init(code, 8);
-            if (mem_op.reg) |reg| {
-                // TODO clean this up!
-                if (reg.size() != 64) {
-                    if (reg.size() != 8 and !tag.isSetCC()) return error.EmitFail;
-                }
-                encoder.rex(.{
-                    .w = tag.isSetCC(),
-                    .b = reg.isExtended(),
-                });
-                opc.encode(encoder);
-                if (reg.lowId() == 4) {
-                    if (mem_op.disp == 0) {
-                        encoder.modRm_SIBDisp0(modrm_ext);
-                        encoder.sib_base(reg.lowId());
-                    } else if (immOpSize(mem_op.disp) == 8) {
-                        encoder.modRm_SIBDisp8(modrm_ext);
-                        encoder.sib_baseDisp8(reg.lowId());
-                        encoder.disp8(@intCast(i8, mem_op.disp));
-                    } else {
-                        encoder.modRm_SIBDisp32(modrm_ext);
-                        encoder.sib_baseDisp32(reg.lowId());
-                        encoder.disp32(mem_op.disp);
-                    }
-                } else {
-                    if (mem_op.disp == 0) {
-                        encoder.modRm_indirectDisp0(modrm_ext, reg.lowId());
-                    } else if (immOpSize(mem_op.disp) == 8) {
-                        encoder.modRm_indirectDisp8(modrm_ext, reg.lowId());
-                        encoder.disp8(@intCast(i8, mem_op.disp));
-                    } else {
-                        encoder.modRm_indirectDisp32(modrm_ext, reg.lowId());
-                        encoder.disp32(mem_op.disp);
-                    }
-                }
-            } else {
-                opc.encode(encoder);
-                if (mem_op.rip) {
-                    encoder.modRm_RIPDisp32(modrm_ext);
-                } else {
-                    encoder.modRm_SIBDisp0(modrm_ext);
-                    encoder.sib_disp32();
-                }
-                encoder.disp32(mem_op.disp);
-            }
-        },
-    }
-}
-
-fn lowerToTdEnc(tag: Tag, moffs: i64, reg: Register, code: *std.ArrayList(u8)) InnerError!void {
-    return lowerToTdFdEnc(tag, reg, moffs, code, true);
-}
-
-fn lowerToFdEnc(tag: Tag, reg: Register, moffs: i64, code: *std.ArrayList(u8)) InnerError!void {
-    return lowerToTdFdEnc(tag, reg, moffs, code, false);
-}
-
-fn lowerToTdFdEnc(tag: Tag, reg: Register, moffs: i64, code: *std.ArrayList(u8), td: bool) InnerError!void {
-    if (reg.lowId() != Register.rax.lowId()) return error.EmitFail;
-    if (reg.size() != immOpSize(moffs)) return error.EmitFail;
-    const opc = if (td)
-        getOpCode(tag, .td, reg.size() == 8).?
-    else
-        getOpCode(tag, .fd, reg.size() == 8).?;
-    const encoder = try Encoder.init(code, 10);
-    if (reg.size() == 16) {
-        encoder.opcode_1byte(0x66);
-    }
-    encoder.rex(.{
-        .w = reg.size() == 64,
-    });
-    opc.encode(encoder);
-    switch (reg.size()) {
-        8 => {
-            const moffs8 = try math.cast(i8, moffs);
-            encoder.imm8(moffs8);
-        },
-        16 => {
-            const moffs16 = try math.cast(i16, moffs);
-            encoder.imm16(moffs16);
-        },
-        32 => {
-            const moffs32 = try math.cast(i32, moffs);
-            encoder.imm32(moffs32);
-        },
-        64 => {
-            encoder.imm64(@bitCast(u64, moffs));
-        },
-        else => unreachable,
+fn lowerToOEnc(tag: Tag, reg: Register, code: *std.ArrayList(u8)) LoweringError!void {
+    if (reg.size() != 16 and reg.size() != 64) {
+        return error.OperandSizeMismatch; // TODO correct for push/pop, but is it universal?
     }
-}
-
-fn lowerToOiEnc(tag: Tag, reg: Register, imm: i64, code: *std.ArrayList(u8)) InnerError!void {
-    if (reg.size() != immOpSize(imm)) return error.EmitFail;
-    const opc = getOpCode(tag, .oi, reg.size() == 8).?;
-    const encoder = try Encoder.init(code, 10);
+    const opc = getOpCode(tag, .o, false).?;
+    const encoder = try Encoder.init(code, 3);
     if (reg.size() == 16) {
         encoder.opcode_1byte(0x66);
     }
     encoder.rex(.{
-        .w = reg.size() == 64,
+        .w = false,
         .b = reg.isExtended(),
     });
     opc.encodeWithReg(encoder, reg);
-    switch (reg.size()) {
-        8 => {
-            const imm8 = try math.cast(i8, imm);
-            encoder.imm8(imm8);
-        },
-        16 => {
-            const imm16 = try math.cast(i16, imm);
-            encoder.imm16(imm16);
-        },
-        32 => {
-            const imm32 = try math.cast(i32, imm);
-            encoder.imm32(imm32);
-        },
-        64 => {
-            encoder.imm64(@bitCast(u64, imm));
-        },
-        else => unreachable,
-    }
 }
 
-fn lowerToMiEnc(tag: Tag, reg_or_mem: RegisterOrMemory, imm: i32, code: *std.ArrayList(u8)) InnerError!void {
-    const modrm_ext = getModRmExt(tag).?;
-    switch (reg_or_mem) {
-        .register => |dst_reg| {
-            const opc = getOpCode(tag, .mi, dst_reg.size() == 8).?;
-            const encoder = try Encoder.init(code, 7);
-            if (dst_reg.size() == 16) {
-                // 0x66 prefix switches to the non-default size; here we assume a switch from
-                // the default 32bits to 16bits operand-size.
-                // More info: https://www.cs.uni-potsdam.de/desn/lehre/ss15/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf#page=32&zoom=auto,-159,773
-                encoder.opcode_1byte(0x66);
-            }
-            encoder.rex(.{
-                .w = dst_reg.size() == 64,
-                .b = dst_reg.isExtended(),
-            });
-            opc.encode(encoder);
-            encoder.modRm_direct(modrm_ext, dst_reg.lowId());
-            switch (dst_reg.size()) {
-                8 => {
-                    const imm8 = try math.cast(i8, imm);
-                    encoder.imm8(imm8);
-                },
-                16 => {
-                    const imm16 = try math.cast(i16, imm);
-                    encoder.imm16(imm16);
-                },
-                32, 64 => encoder.imm32(imm),
-                else => unreachable,
-            }
-        },
-        .memory => |dst_mem| {
-            const opc = getOpCode(tag, .mi, false).?;
-            const encoder = try Encoder.init(code, 12);
-            if (dst_mem.reg) |dst_reg| {
-                // Register dst_reg can either be 64bit or 32bit in size.
-                // TODO for memory operand, immediate operand pair, we currently
-                // have no way of flagging whether the immediate can be 8-, 16- or
-                // 32-bit and whether the corresponding memory operand is respectively
-                // a byte, word or dword ptr.
-                // TODO we currently don't have a way to flag imm32 64bit sign extended
-                if (dst_reg.size() != 64) return error.EmitFail;
-                encoder.rex(.{
-                    .w = false,
-                    .b = dst_reg.isExtended(),
-                });
-                opc.encode(encoder);
-                if (dst_reg.lowId() == 4) {
-                    if (dst_mem.disp == 0) {
-                        encoder.modRm_SIBDisp0(modrm_ext);
-                        encoder.sib_base(dst_reg.lowId());
-                    } else if (immOpSize(dst_mem.disp) == 8) {
-                        encoder.modRm_SIBDisp8(modrm_ext);
-                        encoder.sib_baseDisp8(dst_reg.lowId());
-                        encoder.disp8(@intCast(i8, dst_mem.disp));
-                    } else {
-                        encoder.modRm_SIBDisp32(modrm_ext);
-                        encoder.sib_baseDisp32(dst_reg.lowId());
-                        encoder.disp32(dst_mem.disp);
-                    }
-                } else {
-                    if (dst_mem.disp == 0) {
-                        encoder.modRm_indirectDisp0(modrm_ext, dst_reg.lowId());
-                    } else if (immOpSize(dst_mem.disp) == 8) {
-                        encoder.modRm_indirectDisp8(modrm_ext, dst_reg.lowId());
-                        encoder.disp8(@intCast(i8, dst_mem.disp));
-                    } else {
-                        encoder.modRm_indirectDisp32(modrm_ext, dst_reg.lowId());
-                        encoder.disp32(dst_mem.disp);
-                    }
-                }
-            } else {
-                opc.encode(encoder);
-                if (dst_mem.rip) {
-                    encoder.modRm_RIPDisp32(modrm_ext);
-                } else {
-                    encoder.modRm_SIBDisp0(modrm_ext);
-                    encoder.sib_disp32();
-                }
-                encoder.disp32(dst_mem.disp);
-            }
-            encoder.imm32(imm);
-        },
-    }
+fn lowerToDEnc(tag: Tag, imm: i32, code: *std.ArrayList(u8)) LoweringError!void {
+    const opc = getOpCode(tag, .d, false).?;
+    const encoder = try Encoder.init(code, 6);
+    opc.encode(encoder);
+    encoder.imm32(imm);
 }
 
-fn lowerToRmEnc(
-    tag: Tag,
-    reg: Register,
-    reg_or_mem: RegisterOrMemory,
-    code: *std.ArrayList(u8),
-) InnerError!void {
-    const opc = getOpCode(tag, .rm, reg.size() == 8).?;
+fn lowerToMEnc(tag: Tag, reg_or_mem: RegisterOrMemory, code: *std.ArrayList(u8)) LoweringError!void {
+    const opc = getOpCode(tag, .m, false).?;
+    const modrm_ext = getModRmExt(tag).?;
     switch (reg_or_mem) {
-        .register => |src_reg| {
-            if (reg.size() != src_reg.size()) return error.EmitFail;
-            const encoder = try Encoder.init(code, 3);
-            encoder.rex(.{
-                .w = reg.size() == 64,
-                .r = reg.isExtended(),
-                .b = src_reg.isExtended(),
-            });
-            opc.encode(encoder);
-            encoder.modRm_direct(reg.lowId(), src_reg.lowId());
-        },
-        .memory => |src_mem| {
-            const encoder = try Encoder.init(code, 9);
-            if (reg.size() == 16) {
-                encoder.opcode_1byte(0x66);
-            }
-            if (src_mem.reg) |src_reg| {
-                // TODO handle 32-bit base register - requires prefix 0x67
-                // Intel Manual, Vol 1, chapter 3.6 and 3.6.1
-                if (src_reg.size() != 64) return error.EmitFail;
-                encoder.rex(.{
-                    .w = reg.size() == 64,
-                    .r = reg.isExtended(),
-                    .b = src_reg.isExtended(),
-                });
-                opc.encode(encoder);
-                if (src_reg.lowId() == 4) {
-                    if (src_mem.disp == 0) {
-                        encoder.modRm_SIBDisp0(reg.lowId());
-                        encoder.sib_base(src_reg.lowId());
-                    } else if (immOpSize(src_mem.disp) == 8) {
-                        encoder.modRm_SIBDisp8(reg.lowId());
-                        encoder.sib_baseDisp8(src_reg.lowId());
-                        encoder.disp8(@intCast(i8, src_mem.disp));
-                    } else {
-                        encoder.modRm_SIBDisp32(reg.lowId());
-                        encoder.sib_baseDisp32(src_reg.lowId());
-                        encoder.disp32(src_mem.disp);
-                    }
-                } else {
-                    if (src_mem.disp == 0) {
-                        encoder.modRm_indirectDisp0(reg.lowId(), src_reg.lowId());
-                    } else if (immOpSize(src_mem.disp) == 8) {
-                        encoder.modRm_indirectDisp8(reg.lowId(), src_reg.lowId());
-                        encoder.disp8(@intCast(i8, src_mem.disp));
-                    } else {
-                        encoder.modRm_indirectDisp32(reg.lowId(), src_reg.lowId());
-                        encoder.disp32(src_mem.disp);
-                    }
-                }
-            } else {
-                encoder.rex(.{
-                    .w = reg.size() == 64,
-                    .r = reg.isExtended(),
-                });
-                opc.encode(encoder);
-                if (src_mem.rip) {
-                    encoder.modRm_RIPDisp32(reg.lowId());
-                } else {
-                    encoder.modRm_SIBDisp0(reg.lowId());
-                    encoder.sib_disp32();
-                }
-                encoder.disp32(src_mem.disp);
+        .register => |reg| {
+            const op_size_mismatch = blk: {
+                if (tag.isSetCC() and reg.size() == 8)
+                    break :blk false;
+                break :blk reg.size() != 64 and reg.size() != 16;
+            };
+            if (op_size_mismatch) {
+                return error.OperandSizeMismatch;
             }
-        },
-    }
-}
-
-fn lowerToMrEnc(
-    tag: Tag,
-    reg_or_mem: RegisterOrMemory,
-    reg: Register,
-    code: *std.ArrayList(u8),
-) InnerError!void {
-    // We use size of source register reg to work out which
-    // variant of memory ptr to pick:
-    // * reg is 64bit - qword ptr
-    // * reg is 32bit - dword ptr
-    // * reg is 16bit - word ptr
-    // * reg is 8bit - byte ptr
-    const opc = getOpCode(tag, .mr, reg.size() == 8).?;
-    switch (reg_or_mem) {
-        .register => |dst_reg| {
-            if (dst_reg.size() != reg.size()) return error.EmitFail;
-            const encoder = try Encoder.init(code, 3);
-            encoder.rex(.{
-                .w = dst_reg.size() == 64,
-                .r = reg.isExtended(),
-                .b = dst_reg.isExtended(),
-            });
-            opc.encode(encoder);
-            encoder.modRm_direct(reg.lowId(), dst_reg.lowId());
-        },
-        .memory => |dst_mem| {
-            const encoder = try Encoder.init(code, 9);
+            const encoder = try Encoder.init(code, 4);
             if (reg.size() == 16) {
                 encoder.opcode_1byte(0x66);
             }
-            if (dst_mem.reg) |dst_reg| {
-                if (dst_reg.size() != 64) return error.EmitFail;
-                encoder.rex(.{
-                    .w = reg.size() == 64,
-                    .r = reg.isExtended(),
-                    .b = dst_reg.isExtended(),
-                });
-                opc.encode(encoder);
-                if (dst_reg.lowId() == 4) {
-                    if (dst_mem.disp == 0) {
-                        encoder.modRm_SIBDisp0(reg.lowId());
-                        encoder.sib_base(dst_reg.lowId());
-                    } else if (immOpSize(dst_mem.disp) == 8) {
-                        encoder.modRm_SIBDisp8(reg.lowId());
-                        encoder.sib_baseDisp8(dst_reg.lowId());
-                        encoder.disp8(@intCast(i8, dst_mem.disp));
-                    } else {
-                        encoder.modRm_SIBDisp32(reg.lowId());
-                        encoder.sib_baseDisp32(dst_reg.lowId());
-                        encoder.disp32(dst_mem.disp);
-                    }
-                } else {
-                    if (dst_mem.disp == 0) {
-                        encoder.modRm_indirectDisp0(reg.lowId(), dst_reg.lowId());
-                    } else if (immOpSize(dst_mem.disp) == 8) {
-                        encoder.modRm_indirectDisp8(reg.lowId(), dst_reg.lowId());
-                        encoder.disp8(@intCast(i8, dst_mem.disp));
-                    } else {
-                        encoder.modRm_indirectDisp32(reg.lowId(), dst_reg.lowId());
-                        encoder.disp32(dst_mem.disp);
-                    }
-                }
-            } else {
-                encoder.rex(.{
-                    .w = reg.size() == 64,
-                    .r = reg.isExtended(),
-                });
-                opc.encode(encoder);
-                if (dst_mem.rip) {
-                    encoder.modRm_RIPDisp32(reg.lowId());
-                } else {
-                    encoder.modRm_SIBDisp0(reg.lowId());
-                    encoder.sib_disp32();
-                }
-                encoder.disp32(dst_mem.disp);
-            }
-        },
-    }
-}
-
-fn lowerToRmiEnc(
-    tag: Tag,
-    reg: Register,
-    reg_or_mem: RegisterOrMemory,
-    imm: i32,
-    code: *std.ArrayList(u8),
-) InnerError!void {
-    const opc = getOpCode(tag, .rmi, reg.size() == 8).?;
-    switch (reg_or_mem) {
-        .register => |src_reg| {
-            if (reg.size() != src_reg.size()) return error.EmitFail;
-            const encoder = try Encoder.init(code, 7);
             encoder.rex(.{
-                .w = reg.size() == 64,
-                .r = reg.isExtended(),
-                .b = src_reg.isExtended(),
+                .w = tag.isSetCC(),
+                .b = reg.isExtended(),
             });
             opc.encode(encoder);
-            encoder.modRm_direct(reg.lowId(), src_reg.lowId());
-            switch (reg.size()) {
-                8 => {
-                    const imm8 = try math.cast(i8, imm);
-                    encoder.imm8(imm8);
-                },
-                16 => {
-                    const imm16 = try math.cast(i16, imm);
-                    encoder.imm16(imm16);
-                },
-                32, 64 => encoder.imm32(imm),
-                else => unreachable,
-            }
+            encoder.modRm_direct(modrm_ext, reg.lowId());
         },
-        .memory => |src_mem| {
-            const encoder = try Encoder.init(code, 13);
-            if (reg.size() == 16) {
+        .memory => |mem_op| {
+            if (mem_op.ptr_size != .qword_ptr and mem_op.ptr_size != .word_ptr) {
+                return error.OperandSizeMismatch;
+            }
+            const encoder = try Encoder.init(code, 8);
+            if (mem_op.ptr_size == .word_ptr) {
                 encoder.opcode_1byte(0x66);
             }
-            if (src_mem.reg) |src_reg| {
-                // TODO handle 32-bit base register - requires prefix 0x67
-                // Intel Manual, Vol 1, chapter 3.6 and 3.6.1
-                if (src_reg.size() != 64) return error.EmitFail;
+            if (mem_op.reg) |reg| {
+                if (reg.size() != 64) {
+                    return error.OperandSizeMismatch;
+                }
                 encoder.rex(.{
-                    .w = reg.size() == 64,
-                    .r = reg.isExtended(),
-                    .b = src_reg.isExtended(),
+                    .w = tag.isSetCC(),
+                    .b = reg.isExtended(),
                 });
                 opc.encode(encoder);
-                if (src_reg.lowId() == 4) {
-                    if (src_mem.disp == 0) {
-                        encoder.modRm_SIBDisp0(reg.lowId());
-                        encoder.sib_base(src_reg.lowId());
-                    } else if (immOpSize(src_mem.disp) == 8) {
-                        encoder.modRm_SIBDisp8(reg.lowId());
-                        encoder.sib_baseDisp8(src_reg.lowId());
-                        encoder.disp8(@intCast(i8, src_mem.disp));
+                if (reg.lowId() == 4) {
+                    if (mem_op.disp == 0) {
+                        encoder.modRm_SIBDisp0(modrm_ext);
+                        encoder.sib_base(reg.lowId());
+                    } else if (immOpSize(mem_op.disp) == 8) {
+                        encoder.modRm_SIBDisp8(modrm_ext);
+                        encoder.sib_baseDisp8(reg.lowId());
+                        encoder.disp8(@intCast(i8, mem_op.disp));
                     } else {
-                        encoder.modRm_SIBDisp32(reg.lowId());
-                        encoder.sib_baseDisp32(src_reg.lowId());
-                        encoder.disp32(src_mem.disp);
+                        encoder.modRm_SIBDisp32(modrm_ext);
+                        encoder.sib_baseDisp32(reg.lowId());
+                        encoder.disp32(mem_op.disp);
                     }
                 } else {
-                    if (src_mem.disp == 0) {
-                        encoder.modRm_indirectDisp0(reg.lowId(), src_reg.lowId());
-                    } else if (immOpSize(src_mem.disp) == 8) {
-                        encoder.modRm_indirectDisp8(reg.lowId(), src_reg.lowId());
-                        encoder.disp8(@intCast(i8, src_mem.disp));
+                    if (mem_op.disp == 0) {
+                        encoder.modRm_indirectDisp0(modrm_ext, reg.lowId());
+                    } else if (immOpSize(mem_op.disp) == 8) {
+                        encoder.modRm_indirectDisp8(modrm_ext, reg.lowId());
+                        encoder.disp8(@intCast(i8, mem_op.disp));
                     } else {
-                        encoder.modRm_indirectDisp32(reg.lowId(), src_reg.lowId());
-                        encoder.disp32(src_mem.disp);
+                        encoder.modRm_indirectDisp32(modrm_ext, reg.lowId());
+                        encoder.disp32(mem_op.disp);
                     }
                 }
             } else {
-                encoder.rex(.{
-                    .w = reg.size() == 64,
-                    .r = reg.isExtended(),
-                });
                 opc.encode(encoder);
-                if (src_mem.rip) {
-                    encoder.modRm_RIPDisp32(reg.lowId());
+                if (mem_op.rip) {
+                    encoder.modRm_RIPDisp32(modrm_ext);
                 } else {
-                    encoder.modRm_SIBDisp0(reg.lowId());
+                    encoder.modRm_SIBDisp0(modrm_ext);
                     encoder.sib_disp32();
                 }
-                encoder.disp32(src_mem.disp);
+                encoder.disp32(mem_op.disp);
             }
-            encoder.imm32(imm);
         },
     }
 }
 
-fn mirArith(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
-    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
-    switch (ops.flags) {
-        0b00 => {
-            if (ops.reg2 == .none) {
-                // mov reg1, imm32
-                // MI
-                const imm = emit.mir.instructions.items(.data)[inst].imm;
-                return lowerToMiEnc(tag, RegisterOrMemory.reg(ops.reg1), imm, emit.code);
-            }
-            // mov reg1, reg2
-            // RM
-            return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code);
-        },
-        0b01 => {
-            const imm = emit.mir.instructions.items(.data)[inst].imm;
-            if (ops.reg2 == .none) {
-                // mov reg1, [imm32]
-                // RM
-                return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(null, imm), emit.code);
-            }
-            // mov reg1, [reg2 + imm32]
-            // RM
-            return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(ops.reg2, imm), emit.code);
-        },
-        0b10 => {
-            if (ops.reg2 == .none) {
-                // mov dword ptr [reg1 + 0], imm32
-                // MI
-                const imm = emit.mir.instructions.items(.data)[inst].imm;
-                return lowerToMiEnc(tag, RegisterOrMemory.mem(ops.reg1, 0), imm, emit.code);
-            }
-            // mov [reg1 + imm32], reg2
-            // MR
-            const imm = emit.mir.instructions.items(.data)[inst].imm;
-            return lowerToMrEnc(tag, RegisterOrMemory.mem(ops.reg1, imm), ops.reg2, emit.code);
-        },
-        0b11 => {
-            if (ops.reg2 == .none) {
-                // mov dword ptr [reg1 + imm32], imm32
-                // MI
-                const payload = emit.mir.instructions.items(.data)[inst].payload;
-                const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
-                return lowerToMiEnc(
-                    tag,
-                    RegisterOrMemory.mem(ops.reg1, imm_pair.dest_off),
-                    imm_pair.operand,
-                    emit.code,
-                );
-            }
-            return emit.fail("TODO unused variant: mov reg1, reg2, 0b11", .{});
-        },
-    }
+fn lowerToTdEnc(tag: Tag, moffs: i64, reg: Register, code: *std.ArrayList(u8)) LoweringError!void {
+    return lowerToTdFdEnc(tag, reg, moffs, code, true);
 }
 
-fn immOpSize(imm: i64) u8 {
-    blk: {
-        _ = math.cast(i8, imm) catch break :blk;
-        return 8;
-    }
-    blk: {
-        _ = math.cast(i16, imm) catch break :blk;
-        return 16;
-    }
-    blk: {
-        _ = math.cast(i32, imm) catch break :blk;
-        return 32;
-    }
-    return 64;
+fn lowerToFdEnc(tag: Tag, reg: Register, moffs: i64, code: *std.ArrayList(u8)) LoweringError!void {
+    return lowerToTdFdEnc(tag, reg, moffs, code, false);
 }
 
-// TODO
-fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
-    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
-    const scale = ops.flags;
-    // OP reg1, [reg2 + scale*rcx + imm32]
-    const opc = getOpCode(tag, .rm, ops.reg1.size() == 8).?;
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
-    const encoder = try Encoder.init(emit.code, 8);
-    encoder.rex(.{
-        .w = ops.reg1.size() == 64,
-        .r = ops.reg1.isExtended(),
-        .b = ops.reg2.isExtended(),
-    });
-    opc.encode(encoder);
-    if (imm <= math.maxInt(i8)) {
-        encoder.modRm_SIBDisp8(ops.reg1.lowId());
-        encoder.sib_scaleIndexBaseDisp8(scale, Register.rcx.lowId(), ops.reg2.lowId());
-        encoder.disp8(@intCast(i8, imm));
-    } else {
-        encoder.modRm_SIBDisp32(ops.reg1.lowId());
-        encoder.sib_scaleIndexBaseDisp32(scale, Register.rcx.lowId(), ops.reg2.lowId());
-        encoder.disp32(imm);
+fn lowerToTdFdEnc(tag: Tag, reg: Register, moffs: i64, code: *std.ArrayList(u8), td: bool) LoweringError!void {
+    if (reg.lowId() != Register.rax.lowId()) {
+        return error.RaxOperandExpected;
     }
-}
-
-// TODO
-fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
-    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
-    const scale = ops.flags;
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
-
-    if (ops.reg2 == .none) {
-        // OP [reg1 + scale*rax + 0], imm32
-        const opc = getOpCode(tag, .mi, ops.reg1.size() == 8).?;
-        const modrm_ext = getModRmExt(tag).?;
-        const encoder = try Encoder.init(emit.code, 8);
-        encoder.rex(.{
-            .w = ops.reg1.size() == 64,
-            .b = ops.reg1.isExtended(),
-        });
-        opc.encode(encoder);
-        encoder.modRm_SIBDisp0(modrm_ext);
-        encoder.sib_scaleIndexBase(scale, Register.rax.lowId(), ops.reg1.lowId());
-        if (imm <= math.maxInt(i8)) {
-            encoder.imm8(@intCast(i8, imm));
-        } else if (imm <= math.maxInt(i16)) {
-            encoder.imm16(@intCast(i16, imm));
-        } else {
-            encoder.imm32(imm);
-        }
-        return;
+    if (reg.size() != immOpSize(moffs)) {
+        return error.OperandSizeMismatch;
     }
-
-    // OP [reg1 + scale*rax + imm32], reg2
-    const opc = getOpCode(tag, .mr, ops.reg1.size() == 8).?;
-    const encoder = try Encoder.init(emit.code, 8);
-    encoder.rex(.{
-        .w = ops.reg1.size() == 64,
-        .r = ops.reg2.isExtended(),
-        .b = ops.reg1.isExtended(),
-    });
-    opc.encode(encoder);
-    if (imm <= math.maxInt(i8)) {
-        encoder.modRm_SIBDisp8(ops.reg2.lowId());
-        encoder.sib_scaleIndexBaseDisp8(scale, Register.rax.lowId(), ops.reg1.lowId());
-        encoder.disp8(@intCast(i8, imm));
-    } else {
-        encoder.modRm_SIBDisp32(ops.reg2.lowId());
-        encoder.sib_scaleIndexBaseDisp32(scale, Register.rax.lowId(), ops.reg1.lowId());
-        encoder.disp32(imm);
+    const opc = if (td)
+        getOpCode(tag, .td, reg.size() == 8).?
+    else
+        getOpCode(tag, .fd, reg.size() == 8).?;
+    const encoder = try Encoder.init(code, 10);
+    if (reg.size() == 16) {
+        encoder.opcode_1byte(0x66);
     }
-}
-
-// TODO
-fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
-    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
-    const scale = ops.flags;
-    const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
-    const opc = getOpCode(tag, .mi, ops.reg1.size() == 8).?;
-    const modrm_ext = getModRmExt(tag).?;
-    const encoder = try Encoder.init(emit.code, 2);
     encoder.rex(.{
-        .w = ops.reg1.size() == 64,
-        .b = ops.reg1.isExtended(),
+        .w = reg.size() == 64,
     });
     opc.encode(encoder);
-    if (imm_pair.dest_off <= math.maxInt(i8)) {
-        encoder.modRm_SIBDisp8(modrm_ext);
-        encoder.sib_scaleIndexBaseDisp8(scale, Register.rax.lowId(), ops.reg1.lowId());
-        encoder.disp8(@intCast(i8, imm_pair.dest_off));
-    } else {
-        encoder.modRm_SIBDisp32(modrm_ext);
-        encoder.sib_scaleIndexBaseDisp32(scale, Register.rax.lowId(), ops.reg1.lowId());
-        encoder.disp32(imm_pair.dest_off);
-    }
-    encoder.imm32(imm_pair.operand);
-}
-
-fn mirMovabs(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .movabs);
-    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
-    const imm: i64 = if (ops.reg1.size() == 64) blk: {
-        const payload = emit.mir.instructions.items(.data)[inst].payload;
-        const imm = emit.mir.extraData(Mir.Imm64, payload).data;
-        break :blk @bitCast(i64, imm.decode());
-    } else emit.mir.instructions.items(.data)[inst].imm;
-    if (ops.flags == 0b00) {
-        // movabs reg, imm64
-        // OI
-        return lowerToOiEnc(.mov, ops.reg1, imm, emit.code);
-    }
-    if (ops.reg1 == .none) {
-        // movabs moffs64, rax
-        // TD
-        return lowerToTdEnc(.mov, imm, ops.reg2, emit.code);
-    } else {
-        // movabs rax, moffs64
-        // FD
-        return lowerToFdEnc(.mov, ops.reg1, imm, emit.code);
+    switch (reg.size()) {
+        8 => {
+            const moffs8 = try math.cast(i8, moffs);
+            encoder.imm8(moffs8);
+        },
+        16 => {
+            const moffs16 = try math.cast(i16, moffs);
+            encoder.imm16(moffs16);
+        },
+        32 => {
+            const moffs32 = try math.cast(i32, moffs);
+            encoder.imm32(moffs32);
+        },
+        64 => {
+            encoder.imm64(@bitCast(u64, moffs));
+        },
+        else => unreachable,
     }
 }
 
-fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .imul_complex);
-    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
-    switch (ops.flags) {
-        0b00 => return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code),
-        0b10 => {
-            const imm = emit.mir.instructions.items(.data)[inst].imm;
-            return lowerToRmiEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), imm, emit.code);
+fn lowerToOiEnc(tag: Tag, reg: Register, imm: i64, code: *std.ArrayList(u8)) LoweringError!void {
+    if (reg.size() != immOpSize(imm)) {
+        return error.OperandSizeMismatch;
+    }
+    const opc = getOpCode(tag, .oi, reg.size() == 8).?;
+    const encoder = try Encoder.init(code, 10);
+    if (reg.size() == 16) {
+        encoder.opcode_1byte(0x66);
+    }
+    encoder.rex(.{
+        .w = reg.size() == 64,
+        .b = reg.isExtended(),
+    });
+    opc.encodeWithReg(encoder, reg);
+    switch (reg.size()) {
+        8 => {
+            const imm8 = try math.cast(i8, imm);
+            encoder.imm8(imm8);
         },
-        else => return emit.fail("TODO implement imul", .{}),
+        16 => {
+            const imm16 = try math.cast(i16, imm);
+            encoder.imm16(imm16);
+        },
+        32 => {
+            const imm32 = try math.cast(i32, imm);
+            encoder.imm32(imm32);
+        },
+        64 => {
+            encoder.imm64(@bitCast(u64, imm));
+        },
+        else => unreachable,
     }
 }
 
-fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .lea);
-    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
-    assert(ops.flags == 0b01);
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
-    return lowerToRmEnc(.lea, ops.reg1, RegisterOrMemory.mem(ops.reg2, imm), emit.code);
-}
-
-fn mirLeaRip(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .lea_rip);
-    const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
-    const start_offset = emit.code.items.len;
-    try lowerToRmEnc(.lea, ops.reg1, RegisterOrMemory.rip(0), emit.code);
-    const end_offset = emit.code.items.len;
-    if (@truncate(u1, ops.flags) == 0b0) {
-        // Backpatch the displacement
-        // TODO figure out if this can be simplified
-        const payload = emit.mir.instructions.items(.data)[inst].payload;
-        const imm = emit.mir.extraData(Mir.Imm64, payload).data.decode();
-        const disp = @intCast(i32, @intCast(i64, imm) - @intCast(i64, end_offset - start_offset));
-        mem.writeIntLittle(i32, emit.code.items[end_offset - 4 ..][0..4], disp);
-    } else {
-        const got_entry = emit.mir.instructions.items(.data)[inst].got_entry;
-        if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
-            // TODO I think the reloc might be in the wrong place.
-            const decl = macho_file.active_decl.?;
-            try decl.link.macho.relocs.append(emit.bin_file.allocator, .{
-                .offset = @intCast(u32, end_offset - 4),
-                .target = .{ .local = got_entry },
-                .addend = 0,
-                .subtractor = null,
-                .pcrel = true,
-                .length = 2,
-                .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT),
+fn lowerToMiEnc(tag: Tag, reg_or_mem: RegisterOrMemory, imm: i32, code: *std.ArrayList(u8)) LoweringError!void {
+    const modrm_ext = getModRmExt(tag).?;
+    switch (reg_or_mem) {
+        .register => |dst_reg| {
+            const opc = getOpCode(tag, .mi, dst_reg.size() == 8).?;
+            const encoder = try Encoder.init(code, 7);
+            if (dst_reg.size() == 16) {
+                // 0x66 prefix switches to the non-default size; here we assume a switch from
+                // the default 32bits to 16bits operand-size.
+                // More info: https://www.cs.uni-potsdam.de/desn/lehre/ss15/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf#page=32&zoom=auto,-159,773
+                encoder.opcode_1byte(0x66);
+            }
+            encoder.rex(.{
+                .w = dst_reg.size() == 64,
+                .b = dst_reg.isExtended(),
             });
-        } else {
-            return emit.fail("TODO implement lea_rip for linking backends different than MachO", .{});
-        }
-    }
-}
-
-fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .call_extern);
-    const n_strx = emit.mir.instructions.items(.data)[inst].extern_fn;
-    const offset = blk: {
-        // callq
-        try lowerToDEnc(.call_near, 0, emit.code);
-        break :blk @intCast(u32, emit.code.items.len) - 4;
-    };
-    if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
-        // Add relocation to the decl.
-        try macho_file.active_decl.?.link.macho.relocs.append(emit.bin_file.allocator, .{
-            .offset = offset,
-            .target = .{ .global = n_strx },
-            .addend = 0,
-            .subtractor = null,
-            .pcrel = true,
-            .length = 2,
-            .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH),
-        });
-    } else {
-        return emit.fail("TODO implement call_extern for linking backends different than MachO", .{});
+            opc.encode(encoder);
+            encoder.modRm_direct(modrm_ext, dst_reg.lowId());
+            switch (dst_reg.size()) {
+                8 => {
+                    const imm8 = try math.cast(i8, imm);
+                    encoder.imm8(imm8);
+                },
+                16 => {
+                    const imm16 = try math.cast(i16, imm);
+                    encoder.imm16(imm16);
+                },
+                32, 64 => encoder.imm32(imm),
+                else => unreachable,
+            }
+        },
+        .memory => |dst_mem| {
+            const opc = getOpCode(tag, .mi, dst_mem.ptr_size == .byte_ptr).?;
+            const encoder = try Encoder.init(code, 12);
+            if (dst_mem.ptr_size == .word_ptr) {
+                encoder.opcode_1byte(0x66);
+            }
+            if (dst_mem.reg) |dst_reg| {
+                if (dst_reg.size() != 64) {
+                    return error.OperandSizeMismatch;
+                }
+                encoder.rex(.{
+                    .w = dst_mem.ptr_size == .qword_ptr,
+                    .b = dst_reg.isExtended(),
+                });
+                opc.encode(encoder);
+                if (dst_reg.lowId() == 4) {
+                    if (dst_mem.disp == 0) {
+                        encoder.modRm_SIBDisp0(modrm_ext);
+                        encoder.sib_base(dst_reg.lowId());
+                    } else if (immOpSize(dst_mem.disp) == 8) {
+                        encoder.modRm_SIBDisp8(modrm_ext);
+                        encoder.sib_baseDisp8(dst_reg.lowId());
+                        encoder.disp8(@intCast(i8, dst_mem.disp));
+                    } else {
+                        encoder.modRm_SIBDisp32(modrm_ext);
+                        encoder.sib_baseDisp32(dst_reg.lowId());
+                        encoder.disp32(dst_mem.disp);
+                    }
+                } else {
+                    if (dst_mem.disp == 0) {
+                        encoder.modRm_indirectDisp0(modrm_ext, dst_reg.lowId());
+                    } else if (immOpSize(dst_mem.disp) == 8) {
+                        encoder.modRm_indirectDisp8(modrm_ext, dst_reg.lowId());
+                        encoder.disp8(@intCast(i8, dst_mem.disp));
+                    } else {
+                        encoder.modRm_indirectDisp32(modrm_ext, dst_reg.lowId());
+                        encoder.disp32(dst_mem.disp);
+                    }
+                }
+            } else {
+                opc.encode(encoder);
+                if (dst_mem.rip) {
+                    encoder.modRm_RIPDisp32(modrm_ext);
+                } else {
+                    encoder.modRm_SIBDisp0(modrm_ext);
+                    encoder.sib_disp32();
+                }
+                encoder.disp32(dst_mem.disp);
+            }
+            switch (dst_mem.ptr_size) {
+                .byte_ptr => {
+                    const imm8 = try math.cast(i8, imm);
+                    encoder.imm8(imm8);
+                },
+                .word_ptr => {
+                    const imm16 = try math.cast(i16, imm);
+                    encoder.imm16(imm16);
+                },
+                .dword_ptr, .qword_ptr => {
+                    encoder.imm32(imm);
+                },
+            }
+        },
     }
 }
 
-fn mirDbgLine(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .dbg_line);
-    const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const dbg_line_column = emit.mir.extraData(Mir.DbgLineColumn, payload).data;
-    try emit.dbgAdvancePCAndLine(dbg_line_column.line, dbg_line_column.column);
-}
-
-fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) InnerError!void {
-    const delta_line = @intCast(i32, line) - @intCast(i32, emit.prev_di_line);
-    const delta_pc: usize = emit.code.items.len - emit.prev_di_pc;
-    switch (emit.debug_output) {
-        .dwarf => |dbg_out| {
-            // TODO Look into using the DWARF special opcodes to compress this data.
-            // It lets you emit single-byte opcodes that add different numbers to
-            // both the PC and the line number at the same time.
-            try dbg_out.dbg_line.ensureUnusedCapacity(11);
-            dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_pc);
-            leb128.writeULEB128(dbg_out.dbg_line.writer(), delta_pc) catch unreachable;
-            if (delta_line != 0) {
-                dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.advance_line);
-                leb128.writeILEB128(dbg_out.dbg_line.writer(), delta_line) catch unreachable;
+fn lowerToRmEnc(
+    tag: Tag,
+    reg: Register,
+    reg_or_mem: RegisterOrMemory,
+    code: *std.ArrayList(u8),
+) LoweringError!void {
+    const opc = getOpCode(tag, .rm, reg.size() == 8).?;
+    switch (reg_or_mem) {
+        .register => |src_reg| {
+            if (reg.size() != src_reg.size()) {
+                return error.OperandSizeMismatch;
             }
-            dbg_out.dbg_line.appendAssumeCapacity(DW.LNS.copy);
-            emit.prev_di_pc = emit.code.items.len;
-            emit.prev_di_line = line;
-            emit.prev_di_column = column;
-            emit.prev_di_pc = emit.code.items.len;
+            const encoder = try Encoder.init(code, 3);
+            encoder.rex(.{
+                .w = reg.size() == 64,
+                .r = reg.isExtended(),
+                .b = src_reg.isExtended(),
+            });
+            opc.encode(encoder);
+            encoder.modRm_direct(reg.lowId(), src_reg.lowId());
         },
-        .plan9 => |dbg_out| {
-            if (delta_pc <= 0) return; // only do this when the pc changes
-            // we have already checked the target in the linker to make sure it is compatable
-            const quant = @import("../../link/Plan9/aout.zig").getPCQuant(emit.target.cpu.arch) catch unreachable;
-
-            // increasing the line number
-            try @import("../../link/Plan9.zig").changeLine(dbg_out.dbg_line, delta_line);
-            // increasing the pc
-            const d_pc_p9 = @intCast(i64, delta_pc) - quant;
-            if (d_pc_p9 > 0) {
-                // minus one because if its the last one, we want to leave space to change the line which is one quanta
-                try dbg_out.dbg_line.append(@intCast(u8, @divExact(d_pc_p9, quant) + 128) - quant);
-                if (dbg_out.pcop_change_index.*) |pci|
-                    dbg_out.dbg_line.items[pci] += 1;
-                dbg_out.pcop_change_index.* = @intCast(u32, dbg_out.dbg_line.items.len - 1);
-            } else if (d_pc_p9 == 0) {
-                // we don't need to do anything, because adding the quant does it for us
-            } else unreachable;
-            if (dbg_out.start_line.* == null)
-                dbg_out.start_line.* = emit.prev_di_line;
-            dbg_out.end_line.* = line;
-            // only do this if the pc changed
-            emit.prev_di_line = line;
-            emit.prev_di_column = column;
-            emit.prev_di_pc = emit.code.items.len;
+        .memory => |src_mem| {
+            if (reg.size() != src_mem.ptr_size.size()) {
+                return error.OperandSizeMismatch;
+            }
+            const encoder = try Encoder.init(code, 9);
+            if (reg.size() == 16) {
+                encoder.opcode_1byte(0x66);
+            }
+            if (src_mem.reg) |src_reg| {
+                // TODO handle 32-bit base register - requires prefix 0x67
+                // Intel Manual, Vol 1, chapter 3.6 and 3.6.1
+                if (src_reg.size() != 64) {
+                    return error.OperandSizeMismatch;
+                }
+                encoder.rex(.{
+                    .w = reg.size() == 64,
+                    .r = reg.isExtended(),
+                    .b = src_reg.isExtended(),
+                });
+                opc.encode(encoder);
+                if (src_reg.lowId() == 4) {
+                    if (src_mem.disp == 0) {
+                        encoder.modRm_SIBDisp0(reg.lowId());
+                        encoder.sib_base(src_reg.lowId());
+                    } else if (immOpSize(src_mem.disp) == 8) {
+                        encoder.modRm_SIBDisp8(reg.lowId());
+                        encoder.sib_baseDisp8(src_reg.lowId());
+                        encoder.disp8(@intCast(i8, src_mem.disp));
+                    } else {
+                        encoder.modRm_SIBDisp32(reg.lowId());
+                        encoder.sib_baseDisp32(src_reg.lowId());
+                        encoder.disp32(src_mem.disp);
+                    }
+                } else {
+                    if (src_mem.disp == 0) {
+                        encoder.modRm_indirectDisp0(reg.lowId(), src_reg.lowId());
+                    } else if (immOpSize(src_mem.disp) == 8) {
+                        encoder.modRm_indirectDisp8(reg.lowId(), src_reg.lowId());
+                        encoder.disp8(@intCast(i8, src_mem.disp));
+                    } else {
+                        encoder.modRm_indirectDisp32(reg.lowId(), src_reg.lowId());
+                        encoder.disp32(src_mem.disp);
+                    }
+                }
+            } else {
+                encoder.rex(.{
+                    .w = reg.size() == 64,
+                    .r = reg.isExtended(),
+                });
+                opc.encode(encoder);
+                if (src_mem.rip) {
+                    encoder.modRm_RIPDisp32(reg.lowId());
+                } else {
+                    encoder.modRm_SIBDisp0(reg.lowId());
+                    encoder.sib_disp32();
+                }
+                encoder.disp32(src_mem.disp);
+            }
         },
-        .none => {},
     }
 }
 
-fn mirDbgPrologueEnd(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .dbg_prologue_end);
-    switch (emit.debug_output) {
-        .dwarf => |dbg_out| {
-            try dbg_out.dbg_line.append(DW.LNS.set_prologue_end);
-            try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
+fn lowerToMrEnc(
+    tag: Tag,
+    reg_or_mem: RegisterOrMemory,
+    reg: Register,
+    code: *std.ArrayList(u8),
+) LoweringError!void {
+    const opc = getOpCode(tag, .mr, reg.size() == 8).?;
+    switch (reg_or_mem) {
+        .register => |dst_reg| {
+            if (dst_reg.size() != reg.size()) {
+                return error.OperandSizeMismatch;
+            }
+            const encoder = try Encoder.init(code, 3);
+            encoder.rex(.{
+                .w = dst_reg.size() == 64,
+                .r = reg.isExtended(),
+                .b = dst_reg.isExtended(),
+            });
+            opc.encode(encoder);
+            encoder.modRm_direct(reg.lowId(), dst_reg.lowId());
         },
-        .plan9 => {},
-        .none => {},
-    }
-}
-
-fn mirDbgEpilogueBegin(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .dbg_epilogue_begin);
-    switch (emit.debug_output) {
-        .dwarf => |dbg_out| {
-            try dbg_out.dbg_line.append(DW.LNS.set_epilogue_begin);
-            try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
+        .memory => |dst_mem| {
+            if (dst_mem.ptr_size.size() != reg.size()) {
+                return error.OperandSizeMismatch;
+            }
+            const encoder = try Encoder.init(code, 9);
+            if (reg.size() == 16) {
+                encoder.opcode_1byte(0x66);
+            }
+            if (dst_mem.reg) |dst_reg| {
+                if (dst_reg.size() != 64) {
+                    return error.OperandSizeMismatch;
+                }
+                encoder.rex(.{
+                    .w = dst_mem.ptr_size == .qword_ptr,
+                    .r = reg.isExtended(),
+                    .b = dst_reg.isExtended(),
+                });
+                opc.encode(encoder);
+                if (dst_reg.lowId() == 4) {
+                    if (dst_mem.disp == 0) {
+                        encoder.modRm_SIBDisp0(reg.lowId());
+                        encoder.sib_base(dst_reg.lowId());
+                    } else if (immOpSize(dst_mem.disp) == 8) {
+                        encoder.modRm_SIBDisp8(reg.lowId());
+                        encoder.sib_baseDisp8(dst_reg.lowId());
+                        encoder.disp8(@intCast(i8, dst_mem.disp));
+                    } else {
+                        encoder.modRm_SIBDisp32(reg.lowId());
+                        encoder.sib_baseDisp32(dst_reg.lowId());
+                        encoder.disp32(dst_mem.disp);
+                    }
+                } else {
+                    if (dst_mem.disp == 0) {
+                        encoder.modRm_indirectDisp0(reg.lowId(), dst_reg.lowId());
+                    } else if (immOpSize(dst_mem.disp) == 8) {
+                        encoder.modRm_indirectDisp8(reg.lowId(), dst_reg.lowId());
+                        encoder.disp8(@intCast(i8, dst_mem.disp));
+                    } else {
+                        encoder.modRm_indirectDisp32(reg.lowId(), dst_reg.lowId());
+                        encoder.disp32(dst_mem.disp);
+                    }
+                }
+            } else {
+                encoder.rex(.{
+                    .w = dst_mem.ptr_size == .qword_ptr,
+                    .r = reg.isExtended(),
+                });
+                opc.encode(encoder);
+                if (dst_mem.rip) {
+                    encoder.modRm_RIPDisp32(reg.lowId());
+                } else {
+                    encoder.modRm_SIBDisp0(reg.lowId());
+                    encoder.sib_disp32();
+                }
+                encoder.disp32(dst_mem.disp);
+            }
         },
-        .plan9 => {},
-        .none => {},
     }
 }
 
-fn mirArgDbgInfo(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
-    const tag = emit.mir.instructions.items(.tag)[inst];
-    assert(tag == .arg_dbg_info);
-    const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const arg_dbg_info = emit.mir.extraData(Mir.ArgDbgInfo, payload).data;
-    const mcv = emit.mir.function.args[arg_dbg_info.arg_index];
-    try emit.genArgDbgInfo(arg_dbg_info.air_inst, mcv);
-}
-
-fn genArgDbgInfo(emit: *Emit, inst: Air.Inst.Index, mcv: MCValue) !void {
-    const ty_str = emit.mir.function.air.instructions.items(.data)[inst].ty_str;
-    const zir = &emit.mir.function.mod_fn.owner_decl.getFileScope().zir;
-    const name = zir.nullTerminatedString(ty_str.str);
-    const name_with_null = name.ptr[0 .. name.len + 1];
-    const ty = emit.mir.function.air.getRefType(ty_str.ty);
-
-    switch (mcv) {
-        .register => |reg| {
-            switch (emit.debug_output) {
-                .dwarf => |dbg_out| {
-                    try dbg_out.dbg_info.ensureUnusedCapacity(3);
-                    dbg_out.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter);
-                    dbg_out.dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
-                        1, // ULEB128 dwarf expression length
-                        reg.dwarfLocOp(),
-                    });
-                    try dbg_out.dbg_info.ensureUnusedCapacity(5 + name_with_null.len);
-                    try emit.addDbgInfoTypeReloc(ty); // DW.AT.type,  DW.FORM.ref4
-                    dbg_out.dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string
-                },
-                .plan9 => {},
-                .none => {},
+fn lowerToRmiEnc(
+    tag: Tag,
+    reg: Register,
+    reg_or_mem: RegisterOrMemory,
+    imm: i32,
+    code: *std.ArrayList(u8),
+) LoweringError!void {
+    if (reg.size() == 8) {
+        return error.OperandSizeMismatch;
+    }
+    const opc = getOpCode(tag, .rmi, false).?;
+    const encoder = try Encoder.init(code, 13);
+    if (reg.size() == 16) {
+        encoder.opcode_1byte(0x66);
+    }
+    switch (reg_or_mem) {
+        .register => |src_reg| {
+            if (reg.size() != src_reg.size()) {
+                return error.OperandSizeMismatch;
             }
+            encoder.rex(.{
+                .w = reg.size() == 64,
+                .r = reg.isExtended(),
+                .b = src_reg.isExtended(),
+            });
+            opc.encode(encoder);
+            encoder.modRm_direct(reg.lowId(), src_reg.lowId());
         },
-        .stack_offset => {
-            switch (emit.debug_output) {
-                .dwarf => {},
-                .plan9 => {},
-                .none => {},
+        .memory => |src_mem| {
+            if (src_mem.reg) |src_reg| {
+                // TODO handle 32-bit base register - requires prefix 0x67
+                // Intel Manual, Vol 1, chapter 3.6 and 3.6.1
+                if (src_reg.size() != 64) {
+                    return error.OperandSizeMismatch;
+                }
+                if (src_mem.ptr_size == .byte_ptr) {
+                    return error.OperandSizeMismatch;
+                }
+                encoder.rex(.{
+                    .w = reg.size() == 64,
+                    .r = reg.isExtended(),
+                    .b = src_reg.isExtended(),
+                });
+                opc.encode(encoder);
+                if (src_reg.lowId() == 4) {
+                    if (src_mem.disp == 0) {
+                        encoder.modRm_SIBDisp0(reg.lowId());
+                        encoder.sib_base(src_reg.lowId());
+                    } else if (immOpSize(src_mem.disp) == 8) {
+                        encoder.modRm_SIBDisp8(reg.lowId());
+                        encoder.sib_baseDisp8(src_reg.lowId());
+                        encoder.disp8(@intCast(i8, src_mem.disp));
+                    } else {
+                        encoder.modRm_SIBDisp32(reg.lowId());
+                        encoder.sib_baseDisp32(src_reg.lowId());
+                        encoder.disp32(src_mem.disp);
+                    }
+                } else {
+                    if (src_mem.disp == 0) {
+                        encoder.modRm_indirectDisp0(reg.lowId(), src_reg.lowId());
+                    } else if (immOpSize(src_mem.disp) == 8) {
+                        encoder.modRm_indirectDisp8(reg.lowId(), src_reg.lowId());
+                        encoder.disp8(@intCast(i8, src_mem.disp));
+                    } else {
+                        encoder.modRm_indirectDisp32(reg.lowId(), src_reg.lowId());
+                        encoder.disp32(src_mem.disp);
+                    }
+                }
+            } else {
+                encoder.rex(.{
+                    .w = reg.size() == 64,
+                    .r = reg.isExtended(),
+                });
+                opc.encode(encoder);
+                if (src_mem.rip) {
+                    encoder.modRm_RIPDisp32(reg.lowId());
+                } else {
+                    encoder.modRm_SIBDisp0(reg.lowId());
+                    encoder.sib_disp32();
+                }
+                encoder.disp32(src_mem.disp);
             }
         },
-        else => {},
     }
-}
-
-/// Adds a Type to the .debug_info at the current position. The bytes will be populated later,
-/// after codegen for this symbol is done.
-fn addDbgInfoTypeReloc(emit: *Emit, ty: Type) !void {
-    switch (emit.debug_output) {
-        .dwarf => |dbg_out| {
-            assert(ty.hasCodeGenBits());
-            const index = dbg_out.dbg_info.items.len;
-            try dbg_out.dbg_info.resize(index + 4); // DW.AT.type,  DW.FORM.ref4
-
-            const gop = try dbg_out.dbg_info_type_relocs.getOrPut(emit.bin_file.allocator, ty);
-            if (!gop.found_existing) {
-                gop.value_ptr.* = .{
-                    .off = undefined,
-                    .relocs = .{},
-                };
-            }
-            try gop.value_ptr.relocs.append(emit.bin_file.allocator, @intCast(u32, index));
+    switch (reg.size()) {
+        // TODO 8bit immediate
+        8 => unreachable,
+        16 => {
+            const imm16 = try math.cast(i16, imm);
+            encoder.imm16(imm16);
         },
-        .plan9 => {},
-        .none => {},
+        32, 64 => encoder.imm32(imm),
+        else => unreachable,
     }
 }
 
@@ -1757,34 +1919,44 @@ test "lower MI encoding" {
     defer code.deinit();
     try lowerToMiEnc(.mov, RegisterOrMemory.reg(.rax), 0x10, code.buffer());
     try expectEqualHexStrings("\x48\xc7\xc0\x10\x00\x00\x00", code.emitted(), "mov rax, 0x10");
-    try lowerToMiEnc(.mov, RegisterOrMemory.mem(.r11, 0), 0x10, code.buffer());
+    try lowerToMiEnc(.mov, RegisterOrMemory.mem(.r11, 0, .dword_ptr), 0x10, code.buffer());
     try expectEqualHexStrings("\x41\xc7\x03\x10\x00\x00\x00", code.emitted(), "mov dword ptr [r11 + 0], 0x10");
-    try lowerToMiEnc(.add, RegisterOrMemory.mem(.rdx, -8), 0x10, code.buffer());
+    try lowerToMiEnc(.add, RegisterOrMemory.mem(.rdx, -8, .dword_ptr), 0x10, code.buffer());
     try expectEqualHexStrings("\x81\x42\xF8\x10\x00\x00\x00", code.emitted(), "add dword ptr [rdx - 8], 0x10");
-    try lowerToMiEnc(.sub, RegisterOrMemory.mem(.r11, 0x10000000), 0x10, code.buffer());
+    try lowerToMiEnc(.sub, RegisterOrMemory.mem(.r11, 0x10000000, .dword_ptr), 0x10, code.buffer());
     try expectEqualHexStrings(
         "\x41\x81\xab\x00\x00\x00\x10\x10\x00\x00\x00",
         code.emitted(),
         "sub dword ptr [r11 + 0x10000000], 0x10",
     );
-    try lowerToMiEnc(.@"and", RegisterOrMemory.mem(null, 0x10000000), 0x10, code.buffer());
+    try lowerToMiEnc(.@"and", RegisterOrMemory.mem(null, 0x10000000, .dword_ptr), 0x10, code.buffer());
     try expectEqualHexStrings(
         "\x81\x24\x25\x00\x00\x00\x10\x10\x00\x00\x00",
         code.emitted(),
         "and dword ptr [ds:0x10000000], 0x10",
     );
-    try lowerToMiEnc(.@"and", RegisterOrMemory.mem(.r12, 0x10000000), 0x10, code.buffer());
+    try lowerToMiEnc(.@"and", RegisterOrMemory.mem(.r12, 0x10000000, .dword_ptr), 0x10, code.buffer());
     try expectEqualHexStrings(
         "\x41\x81\xA4\x24\x00\x00\x00\x10\x10\x00\x00\x00",
         code.emitted(),
         "and dword ptr [r12 + 0x10000000], 0x10",
     );
-    try lowerToMiEnc(.mov, RegisterOrMemory.rip(0x10), 0x10, code.buffer());
+    try lowerToMiEnc(.mov, RegisterOrMemory.rip(0x10, .qword_ptr), 0x10, code.buffer());
     try expectEqualHexStrings(
         "\xC7\x05\x10\x00\x00\x00\x10\x00\x00\x00",
         code.emitted(),
-        "mov [rip + 0x10], 0x10",
+        "mov qword ptr [rip + 0x10], 0x10",
+    );
+    try lowerToMiEnc(.mov, RegisterOrMemory.mem(.rbp, -8, .qword_ptr), 0x10, code.buffer());
+    try expectEqualHexStrings(
+        "\x48\xc7\x45\xf8\x10\x00\x00\x00",
+        code.emitted(),
+        "mov qword ptr [rbp - 8], 0x10",
     );
+    try lowerToMiEnc(.mov, RegisterOrMemory.mem(.rbp, -2, .word_ptr), 0x10, code.buffer());
+    try expectEqualHexStrings("\x66\xC7\x45\xFE\x10\x00", code.emitted(), "mov word ptr [rbp - 2], 0x10");
+    try lowerToMiEnc(.mov, RegisterOrMemory.mem(.rbp, -1, .byte_ptr), 0x10, code.buffer());
+    try expectEqualHexStrings("\xC6\x45\xFF\x10", code.emitted(), "mov byte ptr [rbp - 1], 0x10");
 }
 
 test "lower RM encoding" {
@@ -1792,35 +1964,35 @@ test "lower RM encoding" {
     defer code.deinit();
     try lowerToRmEnc(.mov, .rax, RegisterOrMemory.reg(.rbx), code.buffer());
     try expectEqualHexStrings("\x48\x8b\xc3", code.emitted(), "mov rax, rbx");
-    try lowerToRmEnc(.mov, .rax, RegisterOrMemory.mem(.r11, 0), code.buffer());
+    try lowerToRmEnc(.mov, .rax, RegisterOrMemory.mem(.r11, 0, .qword_ptr), code.buffer());
     try expectEqualHexStrings("\x49\x8b\x03", code.emitted(), "mov rax, qword ptr [r11 + 0]");
-    try lowerToRmEnc(.add, .r11, RegisterOrMemory.mem(null, 0x10000000), code.buffer());
+    try lowerToRmEnc(.add, .r11, RegisterOrMemory.mem(null, 0x10000000, .qword_ptr), code.buffer());
     try expectEqualHexStrings(
         "\x4C\x03\x1C\x25\x00\x00\x00\x10",
         code.emitted(),
         "add r11, qword ptr [ds:0x10000000]",
     );
-    try lowerToRmEnc(.add, .r12b, RegisterOrMemory.mem(null, 0x10000000), code.buffer());
+    try lowerToRmEnc(.add, .r12b, RegisterOrMemory.mem(null, 0x10000000, .byte_ptr), code.buffer());
     try expectEqualHexStrings(
         "\x44\x02\x24\x25\x00\x00\x00\x10",
         code.emitted(),
         "add r11b, byte ptr [ds:0x10000000]",
     );
-    try lowerToRmEnc(.sub, .r11, RegisterOrMemory.mem(.r13, 0x10000000), code.buffer());
+    try lowerToRmEnc(.sub, .r11, RegisterOrMemory.mem(.r13, 0x10000000, .qword_ptr), code.buffer());
     try expectEqualHexStrings(
         "\x4D\x2B\x9D\x00\x00\x00\x10",
         code.emitted(),
         "sub r11, qword ptr [r13 + 0x10000000]",
     );
-    try lowerToRmEnc(.sub, .r11, RegisterOrMemory.mem(.r12, 0x10000000), code.buffer());
+    try lowerToRmEnc(.sub, .r11, RegisterOrMemory.mem(.r12, 0x10000000, .qword_ptr), code.buffer());
     try expectEqualHexStrings(
         "\x4D\x2B\x9C\x24\x00\x00\x00\x10",
         code.emitted(),
         "sub r11, qword ptr [r12 + 0x10000000]",
     );
-    try lowerToRmEnc(.mov, .rax, RegisterOrMemory.mem(.rbp, -4), code.buffer());
+    try lowerToRmEnc(.mov, .rax, RegisterOrMemory.mem(.rbp, -4, .qword_ptr), code.buffer());
     try expectEqualHexStrings("\x48\x8B\x45\xFC", code.emitted(), "mov rax, qword ptr [rbp - 4]");
-    try lowerToRmEnc(.lea, .rax, RegisterOrMemory.rip(0x10), code.buffer());
+    try lowerToRmEnc(.lea, .rax, RegisterOrMemory.rip(0x10, .qword_ptr), code.buffer());
     try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", code.emitted(), "lea rax, [rip + 0x10]");
 }
 
@@ -1829,27 +2001,27 @@ test "lower MR encoding" {
     defer code.deinit();
     try lowerToMrEnc(.mov, RegisterOrMemory.reg(.rax), .rbx, code.buffer());
     try expectEqualHexStrings("\x48\x89\xd8", code.emitted(), "mov rax, rbx");
-    try lowerToMrEnc(.mov, RegisterOrMemory.mem(.rbp, -4), .r11, code.buffer());
+    try lowerToMrEnc(.mov, RegisterOrMemory.mem(.rbp, -4, .qword_ptr), .r11, code.buffer());
     try expectEqualHexStrings("\x4c\x89\x5d\xfc", code.emitted(), "mov qword ptr [rbp - 4], r11");
-    try lowerToMrEnc(.add, RegisterOrMemory.mem(null, 0x10000000), .r12b, code.buffer());
+    try lowerToMrEnc(.add, RegisterOrMemory.mem(null, 0x10000000, .byte_ptr), .r12b, code.buffer());
     try expectEqualHexStrings(
         "\x44\x00\x24\x25\x00\x00\x00\x10",
         code.emitted(),
         "add byte ptr [ds:0x10000000], r12b",
     );
-    try lowerToMrEnc(.add, RegisterOrMemory.mem(null, 0x10000000), .r12d, code.buffer());
+    try lowerToMrEnc(.add, RegisterOrMemory.mem(null, 0x10000000, .dword_ptr), .r12d, code.buffer());
     try expectEqualHexStrings(
         "\x44\x01\x24\x25\x00\x00\x00\x10",
         code.emitted(),
         "add dword ptr [ds:0x10000000], r12d",
     );
-    try lowerToMrEnc(.sub, RegisterOrMemory.mem(.r11, 0x10000000), .r12, code.buffer());
+    try lowerToMrEnc(.sub, RegisterOrMemory.mem(.r11, 0x10000000, .qword_ptr), .r12, code.buffer());
     try expectEqualHexStrings(
         "\x4D\x29\xA3\x00\x00\x00\x10",
         code.emitted(),
         "sub qword ptr [r11 + 0x10000000], r12",
     );
-    try lowerToMrEnc(.mov, RegisterOrMemory.rip(0x10), .r12, code.buffer());
+    try lowerToMrEnc(.mov, RegisterOrMemory.rip(0x10, .qword_ptr), .r12, code.buffer());
     try expectEqualHexStrings("\x4C\x89\x25\x10\x00\x00\x00", code.emitted(), "mov qword ptr [rip + 0x10], r12");
 }
 
@@ -1898,19 +2070,23 @@ test "lower M encoding" {
     defer code.deinit();
     try lowerToMEnc(.jmp_near, RegisterOrMemory.reg(.r12), code.buffer());
     try expectEqualHexStrings("\x41\xFF\xE4", code.emitted(), "jmp r12");
-    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(.r12, 0), code.buffer());
+    try lowerToMEnc(.jmp_near, RegisterOrMemory.reg(.r12w), code.buffer());
+    try expectEqualHexStrings("\x66\x41\xFF\xE4", code.emitted(), "jmp r12w");
+    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(.r12, 0, .qword_ptr), code.buffer());
     try expectEqualHexStrings("\x41\xFF\x24\x24", code.emitted(), "jmp qword ptr [r12]");
-    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(.r12, 0x10), code.buffer());
+    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(.r12, 0, .word_ptr), code.buffer());
+    try expectEqualHexStrings("\x66\x41\xFF\x24\x24", code.emitted(), "jmp word ptr [r12]");
+    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(.r12, 0x10, .qword_ptr), code.buffer());
     try expectEqualHexStrings("\x41\xFF\x64\x24\x10", code.emitted(), "jmp qword ptr [r12 + 0x10]");
-    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(.r12, 0x1000), code.buffer());
+    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(.r12, 0x1000, .qword_ptr), code.buffer());
     try expectEqualHexStrings(
         "\x41\xFF\xA4\x24\x00\x10\x00\x00",
         code.emitted(),
         "jmp qword ptr [r12 + 0x1000]",
     );
-    try lowerToMEnc(.jmp_near, RegisterOrMemory.rip(0x10), code.buffer());
+    try lowerToMEnc(.jmp_near, RegisterOrMemory.rip(0x10, .qword_ptr), code.buffer());
     try expectEqualHexStrings("\xFF\x25\x10\x00\x00\x00", code.emitted(), "jmp qword ptr [rip + 0x10]");
-    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(null, 0x10), code.buffer());
+    try lowerToMEnc(.jmp_near, RegisterOrMemory.mem(null, 0x10, .qword_ptr), code.buffer());
     try expectEqualHexStrings("\xFF\x24\x25\x10\x00\x00\x00", code.emitted(), "jmp qword ptr [ds:0x10]");
     try lowerToMEnc(.seta, RegisterOrMemory.reg(.r11b), code.buffer());
     try expectEqualHexStrings("\x49\x0F\x97\xC3", code.emitted(), "seta r11b");
@@ -1928,8 +2104,18 @@ test "lower O encoding" {
 test "lower RMI encoding" {
     var code = TestEmitCode.init();
     defer code.deinit();
-    try lowerToRmiEnc(.imul, .rax, RegisterOrMemory.mem(.rbp, -8), 0x10, code.buffer());
-    try expectEqualHexStrings("\x48\x69\x45\xF8\x10\x00\x00\x00", code.emitted(), "imul rax, [rbp - 8], 0x10");
+    try lowerToRmiEnc(.imul, .rax, RegisterOrMemory.mem(.rbp, -8, .qword_ptr), 0x10, code.buffer());
+    try expectEqualHexStrings(
+        "\x48\x69\x45\xF8\x10\x00\x00\x00",
+        code.emitted(),
+        "imul rax, qword ptr [rbp - 8], 0x10",
+    );
+    try lowerToRmiEnc(.imul, .eax, RegisterOrMemory.mem(.rbp, -4, .dword_ptr), 0x10, code.buffer());
+    try expectEqualHexStrings("\x69\x45\xFC\x10\x00\x00\x00", code.emitted(), "imul ax, [rbp - 2], 0x10");
+    try lowerToRmiEnc(.imul, .ax, RegisterOrMemory.mem(.rbp, -2, .word_ptr), 0x10, code.buffer());
+    try expectEqualHexStrings("\x66\x69\x45\xFE\x10\x00", code.emitted(), "imul eax, [rbp - 4], 0x10");
     try lowerToRmiEnc(.imul, .r12, RegisterOrMemory.reg(.r12), 0x10, code.buffer());
     try expectEqualHexStrings("\x4D\x69\xE4\x10\x00\x00\x00", code.emitted(), "imul r12, r12, 0x10");
+    try lowerToRmiEnc(.imul, .r12w, RegisterOrMemory.reg(.r12w), 0x10, code.buffer());
+    try expectEqualHexStrings("\x66\x45\x69\xE4\x10\x00", code.emitted(), "imul r12w, r12w, 0x10");
 }
src/arch/x86_64/Mir.zig
@@ -38,13 +38,18 @@ pub const Inst = struct {
         ///       0b01  reg1, [reg2 + imm32]
         ///       0b01  reg1, [ds:imm32]
         ///       0b10  [reg1 + imm32], reg2
-        ///       0b10  [reg1 + 0], imm32
-        ///       0b11  [reg1 + imm32], imm32
         /// Notes:
         ///  * If reg2 is `none` then it means Data field `imm` is used as the immediate.
         ///  * When two imm32 values are required, Data field `payload` points at `ImmPair`.
         adc,
 
+        /// ops flags: form:
+        ///       0b00 byte ptr [reg1 + imm32], imm8
+        ///       0b01 word ptr [reg1 + imm32], imm16
+        ///       0b10 dword ptr [reg1 + imm32], imm32
+        ///       0b11 qword ptr [reg1 + imm32], imm32 (sign-extended to imm64)
+        adc_mem_imm,
+
         /// form: reg1, [reg2 + scale*rcx + imm32]
         /// ops flags  scale
         ///      0b00      1
@@ -77,74 +82,95 @@ pub const Inst = struct {
         // The following instructions all have the same encoding as `adc`.
 
         add,
+        add_mem_imm,
         add_scale_src,
         add_scale_dst,
         add_scale_imm,
         sub,
+        sub_mem_imm,
         sub_scale_src,
         sub_scale_dst,
         sub_scale_imm,
         xor,
+        xor_mem_imm,
         xor_scale_src,
         xor_scale_dst,
         xor_scale_imm,
         @"and",
+        and_mem_imm,
         and_scale_src,
         and_scale_dst,
         and_scale_imm,
         @"or",
+        or_mem_imm,
         or_scale_src,
         or_scale_dst,
         or_scale_imm,
         rol,
+        rol_mem_imm,
         rol_scale_src,
         rol_scale_dst,
         rol_scale_imm,
         ror,
+        ror_mem_imm,
         ror_scale_src,
         ror_scale_dst,
         ror_scale_imm,
         rcl,
+        rcl_mem_imm,
         rcl_scale_src,
         rcl_scale_dst,
         rcl_scale_imm,
         rcr,
+        rcr_mem_imm,
         rcr_scale_src,
         rcr_scale_dst,
         rcr_scale_imm,
         shl,
+        shl_mem_imm,
         shl_scale_src,
         shl_scale_dst,
         shl_scale_imm,
         sal,
+        sal_mem_imm,
         sal_scale_src,
         sal_scale_dst,
         sal_scale_imm,
         shr,
+        shr_mem_imm,
         shr_scale_src,
         shr_scale_dst,
         shr_scale_imm,
         sar,
+        sar_mem_imm,
         sar_scale_src,
         sar_scale_dst,
         sar_scale_imm,
         sbb,
+        sbb_mem_imm,
         sbb_scale_src,
         sbb_scale_dst,
         sbb_scale_imm,
         cmp,
+        cmp_mem_imm,
         cmp_scale_src,
         cmp_scale_dst,
         cmp_scale_imm,
         mov,
+        mov_mem_imm,
         mov_scale_src,
         mov_scale_dst,
         mov_scale_imm,
 
+        /// ops flags: form:
+        ///      0b00  reg1, [reg2 + imm32]
+        ///      0b00  reg1, [ds:imm32]
+        ///      0b01  reg1, [rip + imm32]
+        ///      0b10  reg1, [rip + reloc]
+        /// Notes:
+        /// * if flags are 0b10, `Data` contains `got_entry` for the linker to generate
+        /// a valid relocation for.
         lea,
-        lea_scale_src,
-        lea_scale_dst,
-        lea_scale_imm,
 
         /// ops flags: form:
         ///      0bX0  reg1
@@ -160,15 +186,6 @@ pub const Inst = struct {
         ///      0b11  reg1, [reg2 + imm32], imm32
         imul_complex,
 
-        /// ops flags:  form:
-        ///      0bX0   reg1, [rip + imm32]
-        ///      0bX1   reg1, [rip + reloc]
-        /// Notes:
-        /// * if flags are 0bX1, `Data` contains `got_entry` for linker to generate
-        ///   valid relocation for.
-        /// TODO handle more cases
-        lea_rip,
-
         /// ops flags:  form:
         ///      0bX0   reg1, imm64
         ///      0bX1   rax, moffs64
@@ -233,16 +250,8 @@ pub const Inst = struct {
         syscall,
 
         /// ops flags:  form:
-        ///       0b00  reg1, reg2
         ///       0b00  reg1, imm32
-        ///       0b01  reg1, [reg2 + imm32]
-        ///       0b01  reg1, [ds:imm32]
-        ///       0b10  [reg1 + imm32], reg2
-        ///       0b10  [reg1 + 0], imm32
-        ///       0b11  [reg1 + imm32], imm32
-        /// Notes:
-        ///  * If reg2 is `none` then it means Data field `imm` is used as the immediate.
-        ///  * When two imm32 values are required, Data field `payload` points at `ImmPair`.
+        /// TODO handle more cases
         @"test",
 
         /// Breakpoint
src/arch/x86_64/PrintMir.zig
@@ -64,6 +64,15 @@ pub fn printMir(print: *const Print, w: anytype, mir_to_air_map: std.AutoHashMap
             .sbb => try print.mirArith(.sbb, inst, w),
             .cmp => try print.mirArith(.cmp, inst, w),
 
+            .adc_mem_imm => try print.mirArithMemImm(.adc, inst, w),
+            .add_mem_imm => try print.mirArithMemImm(.add, inst, w),
+            .sub_mem_imm => try print.mirArithMemImm(.sub, inst, w),
+            .xor_mem_imm => try print.mirArithMemImm(.xor, inst, w),
+            .and_mem_imm => try print.mirArithMemImm(.@"and", inst, w),
+            .or_mem_imm => try print.mirArithMemImm(.@"or", inst, w),
+            .sbb_mem_imm => try print.mirArithMemImm(.sbb, inst, w),
+            .cmp_mem_imm => try print.mirArithMemImm(.cmp, inst, w),
+
             .adc_scale_src => try print.mirArithScaleSrc(.adc, inst, w),
             .add_scale_src => try print.mirArithScaleSrc(.add, inst, w),
             .sub_scale_src => try print.mirArithScaleSrc(.sub, inst, w),
@@ -98,7 +107,6 @@ pub fn printMir(print: *const Print, w: anytype, mir_to_air_map: std.AutoHashMap
             .movabs => try print.mirMovabs(inst, w),
 
             .lea => try print.mirLea(inst, w),
-            .lea_rip => try print.mirLeaRip(inst, w),
 
             .imul_complex => try print.mirIMulComplex(inst, w),
 
@@ -490,6 +498,13 @@ fn mirArith(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: any
     try w.writeByte('\n');
 }
 
+fn mirArithMemImm(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: anytype) !void {
+    _ = print;
+    _ = tag;
+    _ = inst;
+    return w.writeAll("TODO mirArithMemImm\n");
+}
+
 fn mirArithScaleSrc(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: anytype) !void {
     const ops = Mir.Ops.decode(print.mir.instructions.items(.ops)[inst]);
     const scale = ops.flags;
@@ -560,19 +575,16 @@ fn mirIMulComplex(print: *const Print, inst: Mir.Inst.Index, w: anytype) !void {
 }
 
 fn mirLea(print: *const Print, inst: Mir.Inst.Index, w: anytype) !void {
-    const tag = print.mir.instructions.items(.tag)[inst];
-    assert(tag == .lea);
-    const ops = Mir.Ops.decode(print.mir.instructions.items(.ops)[inst]);
-    assert(ops.flags == 0b01);
-    const imm = print.mir.instructions.items(.data)[inst].imm;
-
-    try w.print("lea {s} [{s} + {d}]\n", .{ @tagName(ops.reg1), @tagName(ops.reg2), imm });
-}
-
-fn mirLeaRip(print: *const Print, inst: Mir.Inst.Index, w: anytype) !void {
     _ = print;
     _ = inst;
-    return w.writeAll("TODO lea_rip\n");
+    return w.writeAll("TODO lea\n");
+    // const tag = print.mir.instructions.items(.tag)[inst];
+    // assert(tag == .lea);
+    // const ops = Mir.Ops.decode(print.mir.instructions.items(.ops)[inst]);
+    // assert(ops.flags == 0b01);
+    // const imm = print.mir.instructions.items(.data)[inst].imm;
+
+    // try w.print("lea {s} [{s} + {d}]\n", .{ @tagName(ops.reg1), @tagName(ops.reg2), imm });
 }
 
 fn mirCallExtern(print: *const Print, inst: Mir.Inst.Index, w: anytype) !void {