Commit d064cf639f

Jacob Young <jacobly0@users.noreply.github.com>
2023-03-25 13:58:00
x86_64: implement 128-bit shifts
1 parent 77300c0
src/arch/x86_64/CodeGen.zig
@@ -409,10 +409,7 @@ fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void {
     _ = try self.addInst(.{
         .tag = .setcc,
         .ops = .r_cc,
-        .data = .{ .r_cc = .{
-            .r1 = reg,
-            .cc = cc,
-        } },
+        .data = .{ .r_cc = .{ .r = reg, .cc = cc } },
     });
 }
 
@@ -424,14 +421,11 @@ fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void {
             .rip => .m_rip_cc,
             else => unreachable,
         },
-        .data = .{ .x_cc = .{
-            .payload = switch (m) {
-                .sib => try self.addExtra(Mir.MemorySib.encode(m)),
-                .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
-                else => unreachable,
-            },
-            .cc = cc,
-        } },
+        .data = .{ .x_cc = .{ .cc = cc, .payload = switch (m) {
+            .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+            .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+            else => unreachable,
+        } } },
     });
 }
 
@@ -439,11 +433,7 @@ fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bi
     _ = try self.addInst(.{
         .tag = .cmovcc,
         .ops = .rr_cc,
-        .data = .{ .rr_cc = .{
-            .r1 = reg1,
-            .r2 = reg2,
-            .cc = cc,
-        } },
+        .data = .{ .rr_cc = .{ .r1 = reg1, .r2 = reg2, .cc = cc } },
     });
 }
 
@@ -455,15 +445,11 @@ fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condi
             .rip => .rm_rip_cc,
             else => unreachable,
         },
-        .data = .{ .rx_cc = .{
-            .r1 = reg,
-            .cc = cc,
-            .payload = switch (m) {
-                .sib => try self.addExtra(Mir.MemorySib.encode(m)),
-                .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
-                else => unreachable,
-            },
-        } },
+        .data = .{ .rx_cc = .{ .r = reg, .cc = cc, .payload = switch (m) {
+            .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+            .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+            else => unreachable,
+        } } },
     });
 }
 
@@ -479,10 +465,7 @@ fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Ins
     return self.addInst(.{
         .tag = .jcc,
         .ops = .inst_cc,
-        .data = .{ .inst_cc = .{
-            .inst = target,
-            .cc = cc,
-        } },
+        .data = .{ .inst_cc = .{ .inst = target, .cc = cc } },
     });
 }
 
@@ -503,13 +486,15 @@ fn asmRegister(self: *Self, tag: Mir.Inst.Tag, reg: Register) !void {
 }
 
 fn asmImmediate(self: *Self, tag: Mir.Inst.Tag, imm: Immediate) !void {
-    const ops: Mir.Inst.Ops = if (imm == .signed) .imm_s else .imm_u;
     _ = try self.addInst(.{
         .tag = tag,
-        .ops = ops,
-        .data = .{ .imm = switch (imm) {
-            .signed => |x| @bitCast(u32, x),
-            .unsigned => |x| @intCast(u32, x),
+        .ops = switch (imm) {
+            .signed => .i_s,
+            .unsigned => .i_u,
+        },
+        .data = .{ .i = switch (imm) {
+            .signed => |s| @bitCast(u32, s),
+            .unsigned => |u| @intCast(u32, u),
         } },
     });
 }
@@ -518,37 +503,43 @@ fn asmRegisterRegister(self: *Self, tag: Mir.Inst.Tag, reg1: Register, reg2: Reg
     _ = try self.addInst(.{
         .tag = tag,
         .ops = .rr,
-        .data = .{ .rr = .{
-            .r1 = reg1,
-            .r2 = reg2,
-        } },
+        .data = .{ .rr = .{ .r1 = reg1, .r2 = reg2 } },
     });
 }
 
 fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Immediate) !void {
     const ops: Mir.Inst.Ops = switch (imm) {
         .signed => .ri_s,
-        .unsigned => |x| if (x <= math.maxInt(u32)) .ri_u else .ri64,
-    };
-    const data: Mir.Inst.Data = switch (ops) {
-        .ri_s => .{ .ri = .{
-            .r1 = reg,
-            .imm = @bitCast(u32, imm.signed),
-        } },
-        .ri_u => .{ .ri = .{
-            .r1 = reg,
-            .imm = @intCast(u32, imm.unsigned),
-        } },
-        .ri64 => .{ .rx = .{
-            .r1 = reg,
-            .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)),
-        } },
-        else => unreachable,
+        .unsigned => |u| if (math.cast(u32, u)) |_| .ri_u else .ri64,
     };
     _ = try self.addInst(.{
         .tag = tag,
         .ops = ops,
-        .data = data,
+        .data = switch (ops) {
+            .ri_s, .ri_u => .{ .ri = .{ .r = reg, .i = switch (imm) {
+                .signed => |s| @bitCast(u32, s),
+                .unsigned => |u| @intCast(u32, u),
+            } } },
+            .ri64 => .{ .rx = .{
+                .r = reg,
+                .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)),
+            } },
+            else => unreachable,
+        },
+    });
+}
+
+fn asmRegisterRegisterRegister(
+    self: *Self,
+    tag: Mir.Inst.Tag,
+    reg1: Register,
+    reg2: Register,
+    reg3: Register,
+) !void {
+    _ = try self.addInst(.{
+        .tag = tag,
+        .ops = .rrr,
+        .data = .{ .rrr = .{ .r1 = reg1, .r2 = reg2, .r3 = reg3 } },
     });
 }
 
@@ -559,109 +550,142 @@ fn asmRegisterRegisterImmediate(
     reg2: Register,
     imm: Immediate,
 ) !void {
-    const ops: Mir.Inst.Ops = switch (imm) {
-        .signed => .rri_s,
-        .unsigned => .rri_u,
-    };
-    const data: Mir.Inst.Data = switch (ops) {
-        .rri_s => .{ .rri = .{
-            .r1 = reg1,
-            .r2 = reg2,
-            .imm = @bitCast(u32, imm.signed),
-        } },
-        .rri_u => .{ .rri = .{
-            .r1 = reg1,
-            .r2 = reg2,
-            .imm = @intCast(u32, imm.unsigned),
-        } },
-        else => unreachable,
-    };
     _ = try self.addInst(.{
         .tag = tag,
-        .ops = ops,
-        .data = data,
+        .ops = switch (imm) {
+            .signed => .rri_s,
+            .unsigned => .rri_u,
+        },
+        .data = .{ .rri = .{ .r1 = reg1, .r2 = reg2, .i = switch (imm) {
+            .signed => |s| @bitCast(u32, s),
+            .unsigned => |u| @intCast(u32, u),
+        } } },
     });
 }
 
 fn asmMemory(self: *Self, tag: Mir.Inst.Tag, m: Memory) !void {
-    const ops: Mir.Inst.Ops = switch (m) {
-        .sib => .m_sib,
-        .rip => .m_rip,
-        else => unreachable,
-    };
-    const data: Mir.Inst.Data = .{ .payload = switch (ops) {
-        .m_sib => try self.addExtra(Mir.MemorySib.encode(m)),
-        .m_rip => try self.addExtra(Mir.MemoryRip.encode(m)),
-        else => unreachable,
-    } };
     _ = try self.addInst(.{
         .tag = tag,
-        .ops = ops,
-        .data = data,
+        .ops = switch (m) {
+            .sib => .m_sib,
+            .rip => .m_rip,
+            else => unreachable,
+        },
+        .data = .{ .payload = switch (m) {
+            .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+            .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+            else => unreachable,
+        } },
     });
 }
 
-fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) !void {
-    const ops: Mir.Inst.Ops = switch (m) {
-        .sib => if (imm == .signed) .mi_s_sib else .mi_u_sib,
-        .rip => if (imm == .signed) .mi_s_rip else .mi_u_rip,
-        else => unreachable,
-    };
-    const payload: u32 = switch (ops) {
-        .mi_s_sib, .mi_u_sib => try self.addExtra(Mir.MemorySib.encode(m)),
-        .mi_s_rip, .mi_u_rip => try self.addExtra(Mir.MemoryRip.encode(m)),
-        else => unreachable,
-    };
-    const data: Mir.Inst.Data = .{
-        .xi = .{ .payload = payload, .imm = switch (imm) {
-            .signed => |x| @bitCast(u32, x),
-            .unsigned => |x| @intCast(u32, x),
-        } },
-    };
+fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) !void {
     _ = try self.addInst(.{
         .tag = tag,
-        .ops = ops,
-        .data = data,
+        .ops = switch (m) {
+            .sib => .rm_sib,
+            .rip => .rm_rip,
+            else => unreachable,
+        },
+        .data = .{ .rx = .{ .r = reg, .payload = switch (m) {
+            .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+            .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+            else => unreachable,
+        } } },
     });
 }
 
-fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) !void {
-    const ops: Mir.Inst.Ops = switch (m) {
-        .sib => .rm_sib,
-        .rip => .rm_rip,
-        else => unreachable,
-    };
-    const data: Mir.Inst.Data = .{
-        .rx = .{ .r1 = reg, .payload = switch (ops) {
-            .rm_sib => try self.addExtra(Mir.MemorySib.encode(m)),
-            .rm_rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) !void {
+    _ = try self.addInst(.{
+        .tag = tag,
+        .ops = switch (m) {
+            .sib => .mr_sib,
+            .rip => .mr_rip,
             else => unreachable,
-        } },
-    };
+        },
+        .data = .{ .rx = .{ .r = reg, .payload = switch (m) {
+            .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+            .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+            else => unreachable,
+        } } },
+    });
+}
+
+fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) !void {
     _ = try self.addInst(.{
         .tag = tag,
-        .ops = ops,
-        .data = data,
+        .ops = switch (m) {
+            .sib => switch (imm) {
+                .signed => .mi_sib_s,
+                .unsigned => .mi_sib_u,
+            },
+            .rip => switch (imm) {
+                .signed => .mi_rip_s,
+                .unsigned => .mi_rip_u,
+            },
+            else => unreachable,
+        },
+        .data = .{ .ix = .{ .i = switch (imm) {
+            .signed => |s| @bitCast(u32, s),
+            .unsigned => |u| @intCast(u32, u),
+        }, .payload = switch (m) {
+            .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+            .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+            else => unreachable,
+        } } },
     });
 }
 
-fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) !void {
-    const ops: Mir.Inst.Ops = switch (m) {
-        .sib => .mr_sib,
-        .rip => .mr_rip,
-        else => unreachable,
-    };
-    const data: Mir.Inst.Data = .{
-        .rx = .{ .r1 = reg, .payload = switch (ops) {
-            .mr_sib => try self.addExtra(Mir.MemorySib.encode(m)),
-            .mr_rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+fn asmMemoryRegisterRegister(
+    self: *Self,
+    tag: Mir.Inst.Tag,
+    m: Memory,
+    reg1: Register,
+    reg2: Register,
+) !void {
+    _ = try self.addInst(.{
+        .tag = tag,
+        .ops = switch (m) {
+            .sib => .mrr_sib,
+            .rip => .mrr_rip,
             else => unreachable,
-        } },
-    };
+        },
+        .data = .{ .rrx = .{ .r1 = reg1, .r2 = reg2, .payload = switch (m) {
+            .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+            .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+            else => unreachable,
+        } } },
+    });
+}
+
+fn asmMemoryRegisterImmediate(
+    self: *Self,
+    tag: Mir.Inst.Tag,
+    m: Memory,
+    reg: Register,
+    imm: Immediate,
+) !void {
     _ = try self.addInst(.{
         .tag = tag,
-        .ops = ops,
-        .data = data,
+        .ops = switch (m) {
+            .sib => switch (imm) {
+                .signed => .mri_sib_s,
+                .unsigned => .mri_sib_u,
+            },
+            .rip => switch (imm) {
+                .signed => .mri_rip_s,
+                .unsigned => .mri_sib_u,
+            },
+            else => unreachable,
+        },
+        .data = .{ .rix = .{ .r = reg, .i = switch (imm) {
+            .signed => |s| @bitCast(u32, s),
+            .unsigned => |u| @intCast(u32, u),
+        }, .payload = switch (m) {
+            .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+            .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+            else => unreachable,
+        } } },
     });
 }
 
@@ -768,18 +792,12 @@ fn gen(self: *Self) InnerError!void {
             self.mir_instructions.set(backpatch_stack_sub, .{
                 .tag = .sub,
                 .ops = .ri_u,
-                .data = .{ .ri = .{
-                    .r1 = .rsp,
-                    .imm = aligned_stack_end,
-                } },
+                .data = .{ .ri = .{ .r = .rsp, .i = aligned_stack_end } },
             });
             self.mir_instructions.set(backpatch_stack_add, .{
                 .tag = .add,
                 .ops = .ri_u,
-                .data = .{ .ri = .{
-                    .r1 = .rsp,
-                    .imm = aligned_stack_end,
-                } },
+                .data = .{ .ri = .{ .r = .rsp, .i = aligned_stack_end } },
             });
 
             const save_reg_list = try self.addExtra(Mir.SaveRegisterList{
@@ -1732,6 +1750,7 @@ fn airAddSubShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
                     .add_with_overflow => try self.genBinOp(null, .add, bin_op.lhs, bin_op.rhs),
                     .sub_with_overflow => try self.genBinOp(null, .sub, bin_op.lhs, bin_op.rhs),
                     .shl_with_overflow => blk: {
+                        try self.register_manager.getReg(.rcx, null);
                         const lhs = try self.resolveInst(bin_op.lhs);
                         const rhs = try self.resolveInst(bin_op.rhs);
                         const shift_ty = self.air.typeOf(bin_op.rhs);
@@ -2022,6 +2041,7 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void {
     try self.spillRegisters(&.{.rcx});
 
     const tag = self.air.instructions.items(.tag)[inst];
+    try self.register_manager.getReg(.rcx, null);
     const lhs = try self.resolveInst(bin_op.lhs);
     const rhs = try self.resolveInst(bin_op.rhs);
     const lhs_ty = self.air.typeOf(bin_op.lhs);
@@ -2151,7 +2171,7 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
                 const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand);
                 if (err_off > 0) {
                     const shift = @intCast(u6, err_off * 8);
-                    try self.genShiftBinOpMir(.shr, err_union_ty, result.register, .{ .immediate = shift });
+                    try self.genShiftBinOpMir(.shr, err_union_ty, result, .{ .immediate = shift });
                 } else {
                     try self.truncateRegister(Type.anyerror, result.register);
                 }
@@ -2183,9 +2203,7 @@ fn genUnwrapErrorUnionPayloadMir(
     const payload_ty = err_union_ty.errorUnionPayload();
 
     const result: MCValue = result: {
-        if (!payload_ty.hasRuntimeBitsIgnoreComptime()) {
-            break :result MCValue.none;
-        }
+        if (!payload_ty.hasRuntimeBitsIgnoreComptime()) break :result .none;
 
         const payload_off = errUnionPayloadOffset(payload_ty, self.target.*);
         switch (err_union) {
@@ -2198,17 +2216,17 @@ fn genUnwrapErrorUnionPayloadMir(
                 const eu_lock = self.register_manager.lockReg(reg);
                 defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
 
-                const result_reg: Register = if (maybe_inst) |inst|
-                    (try self.copyToRegisterWithInstTracking(inst, err_union_ty, err_union)).register
+                const result_mcv: MCValue = if (maybe_inst) |inst|
+                    try self.copyToRegisterWithInstTracking(inst, err_union_ty, err_union)
                 else
-                    try self.copyToTmpRegister(err_union_ty, err_union);
+                    .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) };
                 if (payload_off > 0) {
                     const shift = @intCast(u6, payload_off * 8);
-                    try self.genShiftBinOpMir(.shr, err_union_ty, result_reg, .{ .immediate = shift });
+                    try self.genShiftBinOpMir(.shr, err_union_ty, result_mcv, .{ .immediate = shift });
                 } else {
-                    try self.truncateRegister(payload_ty, result_reg);
+                    try self.truncateRegister(payload_ty, result_mcv.register);
                 }
-                break :result MCValue{ .register = result_reg };
+                break :result result_mcv;
             },
             else => return self.fail("TODO implement genUnwrapErrorUnionPayloadMir for {}", .{err_union}),
         }
@@ -2848,7 +2866,7 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
                 else
                     0;
                 const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand);
-                try self.genShiftBinOpMir(.shr, Type.usize, result.register, .{ .immediate = shift });
+                try self.genShiftBinOpMir(.shr, Type.usize, result, .{ .immediate = shift });
                 break :blk MCValue{
                     .register = registerAlias(result.register, @intCast(u32, layout.tag_size)),
                 };
@@ -3769,12 +3787,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
                 defer if (dst_mcv_lock) |lock| self.register_manager.unlockReg(lock);
 
                 // Shift by struct_field_offset.
-                try self.genShiftBinOpMir(
-                    .shr,
-                    Type.usize,
-                    dst_mcv.register,
-                    .{ .immediate = field_bit_offset },
-                );
+                try self.genShiftBinOpMir(.shr, Type.usize, dst_mcv, .{ .immediate = field_bit_offset });
 
                 // Mask to field_bit_size bits
                 const field_bit_size = field_ty.bitSize(self.target.*);
@@ -3932,29 +3945,186 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue
 }
 
 /// Clobbers .rcx for non-immediate shift value.
-fn genShiftBinOpMir(self: *Self, tag: Mir.Inst.Tag, ty: Type, reg: Register, shift: MCValue) !void {
-    switch (tag) {
-        .sal, .sar, .shl, .shr => {},
-        else => unreachable,
-    }
-
-    const abi_size = @intCast(u32, ty.abiSize(self.target.*));
-    blk: {
-        switch (shift) {
+fn genShiftBinOpMir(
+    self: *Self,
+    tag: Mir.Inst.Tag,
+    ty: Type,
+    lhs_mcv: MCValue,
+    shift_mcv: MCValue,
+) !void {
+    const rhs_mcv: MCValue = rhs: {
+        switch (shift_mcv) {
             .immediate => |imm| switch (imm) {
                 0 => return,
-                else => return self.asmRegisterImmediate(tag, registerAlias(reg, abi_size), Immediate.u(imm)),
-            },
-            .register => |shift_reg| {
-                if (shift_reg == .rcx) break :blk;
+                else => break :rhs shift_mcv,
             },
+            .register => |shift_reg| if (shift_reg == .rcx) break :rhs shift_mcv,
             else => {},
         }
         self.register_manager.getRegAssumeFree(.rcx, null);
-        try self.genSetReg(Type.u8, .rcx, shift);
-    }
+        try self.genSetReg(Type.u8, .rcx, shift_mcv);
+        break :rhs .{ .register = .rcx };
+    };
 
-    try self.asmRegisterRegister(tag, registerAlias(reg, abi_size), .cl);
+    const abi_size = @intCast(u32, ty.abiSize(self.target.*));
+    if (abi_size <= 8) {
+        switch (lhs_mcv) {
+            .register => |lhs_reg| switch (rhs_mcv) {
+                .immediate => |rhs_imm| try self.asmRegisterImmediate(
+                    tag,
+                    registerAlias(lhs_reg, abi_size),
+                    Immediate.u(rhs_imm),
+                ),
+                .register => |rhs_reg| try self.asmRegisterRegister(
+                    tag,
+                    registerAlias(lhs_reg, abi_size),
+                    registerAlias(rhs_reg, 1),
+                ),
+                else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
+                    @tagName(lhs_mcv),
+                    @tagName(rhs_mcv),
+                }),
+            },
+            .stack_offset => |lhs_off| switch (rhs_mcv) {
+                .immediate => |rhs_imm| try self.asmMemoryImmediate(
+                    tag,
+                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -lhs_off }),
+                    Immediate.u(rhs_imm),
+                ),
+                .register => |rhs_reg| try self.asmMemoryRegister(
+                    tag,
+                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -lhs_off }),
+                    registerAlias(rhs_reg, 1),
+                ),
+                else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
+                    @tagName(lhs_mcv),
+                    @tagName(rhs_mcv),
+                }),
+            },
+            else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
+                @tagName(lhs_mcv),
+                @tagName(rhs_mcv),
+            }),
+        }
+    } else if (abi_size <= 16) {
+        const tmp_reg = try self.register_manager.allocReg(null, gp);
+        const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+        defer self.register_manager.unlockReg(tmp_lock);
+
+        const info: struct { offsets: [2]i32, double_tag: Mir.Inst.Tag } = switch (tag) {
+            .shl, .sal => .{ .offsets = .{ 0, 8 }, .double_tag = .shld },
+            .shr, .sar => .{ .offsets = .{ 8, 0 }, .double_tag = .shrd },
+            else => unreachable,
+        };
+        switch (lhs_mcv) {
+            .stack_offset => |dst_off| switch (rhs_mcv) {
+                .immediate => |rhs_imm| if (rhs_imm == 0) {} else if (rhs_imm < 64) {
+                    try self.asmRegisterMemory(
+                        .mov,
+                        tmp_reg,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                    );
+                    try self.asmMemoryRegisterImmediate(
+                        info.double_tag,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }),
+                        tmp_reg,
+                        Immediate.u(rhs_imm),
+                    );
+                    try self.asmMemoryImmediate(
+                        tag,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                        Immediate.u(rhs_imm),
+                    );
+                } else {
+                    assert(rhs_imm < 128);
+                    try self.asmRegisterMemory(
+                        .mov,
+                        tmp_reg,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                    );
+                    if (rhs_imm > 64) {
+                        try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(rhs_imm - 64));
+                    }
+                    try self.asmMemoryRegister(
+                        .mov,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }),
+                        tmp_reg,
+                    );
+                    switch (tag) {
+                        .shl, .sal, .shr => {
+                            try self.asmRegisterRegister(.xor, tmp_reg.to32(), tmp_reg.to32());
+                            try self.asmMemoryRegister(
+                                .mov,
+                                Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                                tmp_reg,
+                            );
+                        },
+                        .sar => try self.asmMemoryImmediate(
+                            tag,
+                            Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                            Immediate.u(63),
+                        ),
+                        else => unreachable,
+                    }
+                },
+                else => {
+                    const first_reg = try self.register_manager.allocReg(null, gp);
+                    const first_lock = self.register_manager.lockRegAssumeUnused(first_reg);
+                    defer self.register_manager.unlockReg(first_lock);
+
+                    const second_reg = try self.register_manager.allocReg(null, gp);
+                    const second_lock = self.register_manager.lockRegAssumeUnused(second_reg);
+                    defer self.register_manager.unlockReg(second_lock);
+
+                    try self.genSetReg(Type.u8, .cl, rhs_mcv);
+                    try self.asmRegisterMemory(
+                        .mov,
+                        first_reg,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                    );
+                    try self.asmRegisterMemory(
+                        .mov,
+                        second_reg,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }),
+                    );
+                    switch (tag) {
+                        .shl, .sal, .shr => try self.asmRegisterRegister(
+                            .xor,
+                            tmp_reg.to32(),
+                            tmp_reg.to32(),
+                        ),
+                        .sar => {
+                            try self.asmRegisterRegister(.mov, tmp_reg, first_reg);
+                            try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63));
+                        },
+                        else => unreachable,
+                    }
+                    try self.asmRegisterRegisterRegister(info.double_tag, second_reg, first_reg, .cl);
+                    try self.asmRegisterRegister(tag, first_reg, .cl);
+                    try self.asmRegisterImmediate(.cmp, .cl, Immediate.u(64));
+                    try self.asmCmovccRegisterRegister(second_reg, first_reg, .ae);
+                    try self.asmCmovccRegisterRegister(first_reg, tmp_reg, .ae);
+                    try self.asmMemoryRegister(
+                        .mov,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }),
+                        second_reg,
+                    );
+                    try self.asmMemoryRegister(
+                        .mov,
+                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                        first_reg,
+                    );
+                },
+            },
+            else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
+                @tagName(lhs_mcv),
+                @tagName(rhs_mcv),
+            }),
+        }
+    } else return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
+        @tagName(lhs_mcv),
+        @tagName(rhs_mcv),
+    });
 }
 
 /// Result is always a register.
@@ -3964,68 +4134,61 @@ fn genShiftBinOp(
     self: *Self,
     tag: Air.Inst.Tag,
     maybe_inst: ?Air.Inst.Index,
-    lhs: MCValue,
-    rhs: MCValue,
+    lhs_mcv: MCValue,
+    rhs_mcv: MCValue,
     lhs_ty: Type,
     rhs_ty: Type,
 ) !MCValue {
-    if (lhs_ty.zigTypeTag() == .Vector or lhs_ty.zigTypeTag() == .Float) {
+    if (lhs_ty.zigTypeTag() == .Vector) {
         return self.fail("TODO implement genShiftBinOp for {}", .{lhs_ty.fmtDebug()});
     }
-    if (lhs_ty.abiSize(self.target.*) > 8) {
+
+    assert(rhs_ty.abiSize(self.target.*) == 1);
+
+    const lhs_abi_size = lhs_ty.abiSize(self.target.*);
+    if (lhs_abi_size > 16) {
         return self.fail("TODO implement genShiftBinOp for {}", .{lhs_ty.fmtDebug()});
     }
 
-    assert(rhs_ty.abiSize(self.target.*) == 1);
+    self.register_manager.getRegAssumeFree(.rcx, null);
+    const rcx_lock = self.register_manager.lockRegAssumeUnused(.rcx);
+    defer self.register_manager.unlockReg(rcx_lock);
 
-    const lhs_lock: ?RegisterLock = switch (lhs) {
+    const lhs_lock = switch (lhs_mcv) {
         .register => |reg| self.register_manager.lockReg(reg),
         else => null,
     };
     defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-    const rhs_lock: ?RegisterLock = switch (rhs) {
+    const rhs_lock = switch (rhs_mcv) {
         .register => |reg| self.register_manager.lockReg(reg),
         else => null,
     };
     defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
 
-    self.register_manager.getRegAssumeFree(.rcx, null);
-    const rcx_lock = self.register_manager.lockRegAssumeUnused(.rcx);
-    defer self.register_manager.unlockReg(rcx_lock);
-
-    const dst: MCValue = blk: {
+    const dst_mcv: MCValue = dst: {
         if (maybe_inst) |inst| {
             const bin_op = self.air.instructions.items(.data)[inst].bin_op;
-            // TODO dst can also be a memory location
-            if (self.reuseOperand(inst, bin_op.lhs, 0, lhs) and lhs.isRegister()) {
-                break :blk lhs;
-            }
-            break :blk try self.copyToRegisterWithInstTracking(inst, lhs_ty, lhs);
+            if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv;
         }
-        break :blk MCValue{ .register = try self.copyToTmpRegister(lhs_ty, lhs) };
+        const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
+        try self.setRegOrMem(lhs_ty, dst_mcv, lhs_mcv);
+        break :dst dst_mcv;
     };
 
     const signedness = lhs_ty.intInfo(self.target.*).signedness;
-    switch (tag) {
-        .shl => try self.genShiftBinOpMir(switch (signedness) {
+    try self.genShiftBinOpMir(switch (tag) {
+        .shl, .shl_exact => switch (signedness) {
             .signed => .sal,
             .unsigned => .shl,
-        }, lhs_ty, dst.register, rhs),
-
-        .shl_exact => try self.genShiftBinOpMir(.shl, lhs_ty, dst.register, rhs),
-
-        .shr,
-        .shr_exact,
-        => try self.genShiftBinOpMir(switch (signedness) {
+        },
+        .shr, .shr_exact => switch (signedness) {
             .signed => .sar,
             .unsigned => .shr,
-        }, lhs_ty, dst.register, rhs),
-
+        },
         else => unreachable,
-    }
-
-    return dst;
+    }, lhs_ty, dst_mcv, rhs_mcv);
+    return dst_mcv;
 }
 
 /// Result is always a register.
@@ -5552,7 +5715,7 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) !
             const tmp_reg = try self.copyToTmpRegister(ty, operand);
             if (err_off > 0) {
                 const shift = @intCast(u6, err_off * 8);
-                try self.genShiftBinOpMir(.shr, ty, tmp_reg, .{ .immediate = shift });
+                try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{ .immediate = shift });
             } else {
                 try self.truncateRegister(Type.anyerror, tmp_reg);
             }
@@ -6506,7 +6669,7 @@ fn genInlineMemcpyRegisterRegister(
             }), registerAlias(tmp_reg, nearest_power_of_two));
 
             if (nearest_power_of_two > 1) {
-                try self.genShiftBinOpMir(.shr, ty, tmp_reg, .{
+                try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{
                     .immediate = nearest_power_of_two * 8,
                 });
             }
@@ -7032,7 +7195,7 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
 
     try self.spillEflagsIfOccupied();
     _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{
-        .r1 = new_reg,
+        .r = new_reg,
         .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)),
     } } });
 
@@ -7110,7 +7273,7 @@ fn atomicOp(
             .xadd, .add, .sub, .@"and", .@"or", .xor => .lock_mr_sib,
             else => unreachable,
         }, .data = .{ .rx = .{
-            .r1 = registerAlias(dst_reg, val_abi_size),
+            .r = registerAlias(dst_reg, val_abi_size),
             .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)),
         } } });
         return;
@@ -7702,8 +7865,8 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
     switch (int_info.signedness) {
         .signed => {
             const shift = @intCast(u6, max_reg_bit_width - int_info.bits);
-            try self.genShiftBinOpMir(.sal, Type.isize, reg, .{ .immediate = shift });
-            try self.genShiftBinOpMir(.sar, Type.isize, reg, .{ .immediate = shift });
+            try self.genShiftBinOpMir(.sal, Type.isize, .{ .register = reg }, .{ .immediate = shift });
+            try self.genShiftBinOpMir(.sar, Type.isize, .{ .register = reg }, .{ .immediate = shift });
         },
         .unsigned => {
             const shift = @intCast(u6, max_reg_bit_width - int_info.bits);
src/arch/x86_64/Emit.zig
@@ -121,7 +121,9 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
             .sbb,
             .sfence,
             .shl,
+            .shld,
             .shr,
+            .shrd,
             .sub,
             .syscall,
             .@"test",
@@ -231,10 +233,10 @@ fn mirEncodeGeneric(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerE
     const prefix: Instruction.Prefix = switch (ops) {
         .lock_m_sib,
         .lock_m_rip,
-        .lock_mi_u_sib,
-        .lock_mi_u_rip,
-        .lock_mi_s_sib,
-        .lock_mi_s_rip,
+        .lock_mi_sib_u,
+        .lock_mi_rip_u,
+        .lock_mi_sib_s,
+        .lock_mi_rip_s,
         .lock_mr_sib,
         .lock_mr_rip,
         .lock_moffs_rax,
@@ -249,31 +251,36 @@ fn mirEncodeGeneric(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerE
 
     switch (ops) {
         .none => {},
-        .imm_s => op1 = .{ .imm = Immediate.s(@bitCast(i32, data.imm)) },
-        .imm_u => op1 = .{ .imm = Immediate.u(data.imm) },
+        .i_s => op1 = .{ .imm = Immediate.s(@bitCast(i32, data.i)) },
+        .i_u => op1 = .{ .imm = Immediate.u(data.i) },
         .r => op1 = .{ .reg = data.r },
         .rr => {
             op1 = .{ .reg = data.rr.r1 };
             op2 = .{ .reg = data.rr.r2 };
         },
+        .rrr => {
+            op1 = .{ .reg = data.rrr.r1 };
+            op2 = .{ .reg = data.rrr.r2 };
+            op3 = .{ .reg = data.rrr.r3 };
+        },
         .ri_s, .ri_u => {
             const imm = switch (ops) {
-                .ri_s => Immediate.s(@bitCast(i32, data.ri.imm)),
-                .ri_u => Immediate.u(data.ri.imm),
+                .ri_s => Immediate.s(@bitCast(i32, data.ri.i)),
+                .ri_u => Immediate.u(data.ri.i),
                 else => unreachable,
             };
-            op1 = .{ .reg = data.ri.r1 };
+            op1 = .{ .reg = data.ri.r };
             op2 = .{ .imm = imm };
         },
         .ri64 => {
             const imm64 = emit.mir.extraData(Mir.Imm64, data.rx.payload).data;
-            op1 = .{ .reg = data.rx.r1 };
+            op1 = .{ .reg = data.rx.r };
             op2 = .{ .imm = Immediate.u(Mir.Imm64.decode(imm64)) };
         },
         .rri_s, .rri_u => {
             const imm = switch (ops) {
-                .rri_s => Immediate.s(@bitCast(i32, data.rri.imm)),
-                .rri_u => Immediate.u(data.rri.imm),
+                .rri_s => Immediate.s(@bitCast(i32, data.rri.i)),
+                .rri_u => Immediate.u(data.rri.i),
                 else => unreachable,
             };
             op1 = .{ .reg = data.rri.r1 };
@@ -288,21 +295,21 @@ fn mirEncodeGeneric(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerE
             const mrip = emit.mir.extraData(Mir.MemoryRip, data.payload).data;
             op1 = .{ .mem = Mir.MemoryRip.decode(mrip) };
         },
-        .mi_s_sib, .mi_u_sib, .lock_mi_s_sib, .lock_mi_u_sib => {
-            const msib = emit.mir.extraData(Mir.MemorySib, data.xi.payload).data;
+        .mi_sib_s, .mi_sib_u, .lock_mi_sib_s, .lock_mi_sib_u => {
+            const msib = emit.mir.extraData(Mir.MemorySib, data.ix.payload).data;
             const imm = switch (ops) {
-                .mi_s_sib, .lock_mi_s_sib => Immediate.s(@bitCast(i32, data.xi.imm)),
-                .mi_u_sib, .lock_mi_u_sib => Immediate.u(data.xi.imm),
+                .mi_sib_s, .lock_mi_sib_s => Immediate.s(@bitCast(i32, data.ix.i)),
+                .mi_sib_u, .lock_mi_sib_u => Immediate.u(data.ix.i),
                 else => unreachable,
             };
             op1 = .{ .mem = Mir.MemorySib.decode(msib) };
             op2 = .{ .imm = imm };
         },
-        .mi_u_rip, .mi_s_rip, .lock_mi_u_rip, .lock_mi_s_rip => {
-            const mrip = emit.mir.extraData(Mir.MemoryRip, data.xi.payload).data;
+        .mi_rip_u, .mi_rip_s, .lock_mi_rip_u, .lock_mi_rip_s => {
+            const mrip = emit.mir.extraData(Mir.MemoryRip, data.ix.payload).data;
             const imm = switch (ops) {
-                .mi_s_rip, .lock_mi_s_rip => Immediate.s(@bitCast(i32, data.xi.imm)),
-                .mi_u_rip, .lock_mi_u_rip => Immediate.u(data.xi.imm),
+                .mi_rip_s, .lock_mi_rip_s => Immediate.s(@bitCast(i32, data.ix.i)),
+                .mi_rip_u, .lock_mi_rip_u => Immediate.u(data.ix.i),
                 else => unreachable,
             };
             op1 = .{ .mem = Mir.MemoryRip.decode(mrip) };
@@ -310,7 +317,7 @@ fn mirEncodeGeneric(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerE
         },
         .rm_sib, .mr_sib, .lock_mr_sib => {
             const msib = emit.mir.extraData(Mir.MemorySib, data.rx.payload).data;
-            const op_r = .{ .reg = data.rx.r1 };
+            const op_r = .{ .reg = data.rx.r };
             const op_m = .{ .mem = Mir.MemorySib.decode(msib) };
             switch (ops) {
                 .rm_sib => {
@@ -326,7 +333,7 @@ fn mirEncodeGeneric(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerE
         },
         .rm_rip, .mr_rip, .lock_mr_rip => {
             const mrip = emit.mir.extraData(Mir.MemoryRip, data.rx.payload).data;
-            const op_r = .{ .reg = data.rx.r1 };
+            const op_r = .{ .reg = data.rx.r };
             const op_m = .{ .mem = Mir.MemoryRip.decode(mrip) };
             switch (ops) {
                 .rm_rip => {
@@ -340,6 +347,40 @@ fn mirEncodeGeneric(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerE
                 else => unreachable,
             }
         },
+        .mrr_sib => {
+            const msib = emit.mir.extraData(Mir.MemorySib, data.rrx.payload).data;
+            op1 = .{ .mem = Mir.MemorySib.decode(msib) };
+            op2 = .{ .reg = data.rrx.r1 };
+            op2 = .{ .reg = data.rrx.r2 };
+        },
+        .mrr_rip => {
+            const mrip = emit.mir.extraData(Mir.MemoryRip, data.rrx.payload).data;
+            op1 = .{ .mem = Mir.MemoryRip.decode(mrip) };
+            op2 = .{ .reg = data.rrx.r1 };
+            op2 = .{ .reg = data.rrx.r2 };
+        },
+        .mri_sib_u, .mri_sib_s => {
+            const msib = emit.mir.extraData(Mir.MemorySib, data.rix.payload).data;
+            const imm = switch (ops) {
+                .mri_sib_s => Immediate.s(@bitCast(i32, data.rix.i)),
+                .mri_sib_u, .lock_mi_rip_u => Immediate.u(data.rix.i),
+                else => unreachable,
+            };
+            op1 = .{ .mem = Mir.MemorySib.decode(msib) };
+            op2 = .{ .reg = data.rix.r };
+            op3 = .{ .imm = imm };
+        },
+        .mri_rip_u, .mri_rip_s => {
+            const mrip = emit.mir.extraData(Mir.MemoryRip, data.rix.payload).data;
+            const imm = switch (ops) {
+                .mri_rip_s => Immediate.s(@bitCast(i32, data.rix.i)),
+                .mri_rip_u, .lock_mi_rip_u => Immediate.u(data.rix.i),
+                else => unreachable,
+            };
+            op1 = .{ .mem = Mir.MemoryRip.decode(mrip) };
+            op2 = .{ .reg = data.rix.r };
+            op3 = .{ .imm = imm };
+        },
         else => return emit.fail("TODO handle generic encoding: {s}, {s}", .{
             @tagName(mnemonic),
             @tagName(ops),
@@ -451,12 +492,12 @@ fn mirMovsx(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
         },
         .rm_sib => {
             const msib = emit.mir.extraData(Mir.MemorySib, data.rx.payload).data;
-            op1 = .{ .reg = data.rx.r1 };
+            op1 = .{ .reg = data.rx.r };
             op2 = .{ .mem = Mir.MemorySib.decode(msib) };
         },
         .rm_rip => {
             const mrip = emit.mir.extraData(Mir.MemoryRip, data.rx.payload).data;
-            op1 = .{ .reg = data.rx.r1 };
+            op1 = .{ .reg = data.rx.r };
             op2 = .{ .mem = Mir.MemoryRip.decode(mrip) };
         },
         else => unreachable, // TODO
@@ -495,7 +536,7 @@ fn mirCmovcc(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             const extra = emit.mir.extraData(Mir.MemorySib, data.payload).data;
             const mnemonic = mnemonicFromConditionCode("cmov", data.cc);
             return emit.encode(mnemonic, .{
-                .op1 = .{ .reg = data.r1 },
+                .op1 = .{ .reg = data.r },
                 .op2 = .{ .mem = Mir.MemorySib.decode(extra) },
             });
         },
@@ -504,7 +545,7 @@ fn mirCmovcc(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             const extra = emit.mir.extraData(Mir.MemoryRip, data.payload).data;
             const mnemonic = mnemonicFromConditionCode("cmov", data.cc);
             return emit.encode(mnemonic, .{
-                .op1 = .{ .reg = data.r1 },
+                .op1 = .{ .reg = data.r },
                 .op2 = .{ .mem = Mir.MemoryRip.decode(extra) },
             });
         },
@@ -519,7 +560,7 @@ fn mirSetcc(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             const data = emit.mir.instructions.items(.data)[inst].r_cc;
             const mnemonic = mnemonicFromConditionCode("set", data.cc);
             return emit.encode(mnemonic, .{
-                .op1 = .{ .reg = data.r1 },
+                .op1 = .{ .reg = data.r },
             });
         },
         .m_sib_cc => {
src/arch/x86_64/encoder.zig
@@ -174,7 +174,7 @@ pub const Instruction = struct {
             .td => try encoder.imm64(inst.op1.mem.moffs.offset),
             else => {
                 const mem_op = switch (encoding.op_en) {
-                    .m, .mi, .m1, .mc, .mr => inst.op1,
+                    .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.op1,
                     .rm, .rmi => inst.op2,
                     else => unreachable,
                 };
@@ -182,7 +182,7 @@ pub const Instruction = struct {
                     .reg => |reg| {
                         const rm = switch (encoding.op_en) {
                             .m, .mi, .m1, .mc => encoding.modRmExt(),
-                            .mr => inst.op2.reg.lowEnc(),
+                            .mr, .mri, .mrc => inst.op2.reg.lowEnc(),
                             .rm, .rmi => inst.op1.reg.lowEnc(),
                             else => unreachable,
                         };
@@ -191,7 +191,7 @@ pub const Instruction = struct {
                     .mem => |mem| {
                         const op = switch (encoding.op_en) {
                             .m, .mi, .m1, .mc => .none,
-                            .mr => inst.op2,
+                            .mr, .mri, .mrc => inst.op2,
                             .rm, .rmi => inst.op1,
                             else => unreachable,
                         };
@@ -202,7 +202,7 @@ pub const Instruction = struct {
 
                 switch (encoding.op_en) {
                     .mi => try encodeImm(inst.op2.imm, encoding.op2, encoder),
-                    .rmi => try encodeImm(inst.op3.imm, encoding.op3, encoder),
+                    .rmi, .mri => try encodeImm(inst.op3.imm, encoding.op3, encoder),
                     else => {},
                 }
             },
@@ -251,7 +251,7 @@ pub const Instruction = struct {
                     else => unreachable,
                 };
             } else null,
-            .m, .mi, .m1, .mc, .mr => if (inst.op1.isSegmentRegister()) blk: {
+            .m, .mi, .m1, .mc, .mr, .mri, .mrc => if (inst.op1.isSegmentRegister()) blk: {
                 break :blk switch (inst.op1) {
                     .reg => |r| r,
                     .mem => |m| m.base().?,
@@ -275,13 +275,11 @@ pub const Instruction = struct {
 
         switch (op_en) {
             .np, .i, .zi, .fd, .td, .d => {},
-            .o, .oi => {
-                rex.b = inst.op1.reg.isExtended();
-            },
-            .m, .mi, .m1, .mc, .mr, .rm, .rmi => {
+            .o, .oi => rex.b = inst.op1.reg.isExtended(),
+            .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc => {
                 const r_op = switch (op_en) {
                     .rm, .rmi => inst.op1,
-                    .mr => inst.op2,
+                    .mr, .mri, .mrc => inst.op2,
                     else => null,
                 };
                 if (r_op) |op| {
@@ -290,7 +288,7 @@ pub const Instruction = struct {
 
                 const b_x_op = switch (op_en) {
                     .rm, .rmi => inst.op2,
-                    .m, .mi, .m1, .mc, .mr => inst.op1,
+                    .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.op1,
                     else => unreachable,
                 };
                 switch (b_x_op) {
src/arch/x86_64/Encoding.zig
@@ -262,15 +262,15 @@ pub fn format(
             try writer.print("+{s} ", .{tag});
         },
         .m, .mi, .m1, .mc => try writer.print("/{d} ", .{encoding.modRmExt()}),
-        .mr, .rm, .rmi => try writer.writeAll("/r "),
+        .mr, .rm, .rmi, .mri, .mrc => try writer.writeAll("/r "),
     }
 
     switch (encoding.op_en) {
-        .i, .d, .zi, .oi, .mi, .rmi => {
+        .i, .d, .zi, .oi, .mi, .rmi, .mri => {
             const op = switch (encoding.op_en) {
                 .i, .d => encoding.op1,
                 .zi, .oi, .mi => encoding.op2,
-                .rmi => encoding.op3,
+                .rmi, .mri => encoding.op3,
                 else => unreachable,
             };
             const tag = switch (op) {
@@ -285,7 +285,7 @@ pub fn format(
             };
             try writer.print("{s} ", .{tag});
         },
-        .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm => {},
+        .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc => {},
     }
 
     try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
@@ -334,7 +334,7 @@ pub const Mnemonic = enum {
     rcl, rcr, ret, rol, ror,
     sal, sar, sbb,
     scas, scasb, scasd, scasq, scasw,
-    shl, shr, sub, syscall,
+    shl, shld, shr, shrd, sub, syscall,
     seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
     setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns,
     setnz, seto, setp, setpe, setpo, sets, setz,
@@ -374,7 +374,8 @@ pub const OpEn = enum {
     i, zi,
     d, m,
     fd, td,
-    m1, mc, mi, mr, rm, rmi,
+    m1, mc, mi, mr, rm,
+    rmi, mri, mrc,
     // zig fmt: on
 };
 
src/arch/x86_64/encodings.zig
@@ -693,6 +693,13 @@ pub const table = &[_]Entry{
     .{ .shl, .mi, .rm32, .imm8,  .none, .none, &.{ 0xc1 }, 4, .none  },
     .{ .shl, .mi, .rm64, .imm8,  .none, .none, &.{ 0xc1 }, 4, .long  },
 
+    .{ .shld, .mri, .rm16, .r16, .imm8, .none, &.{ 0x0f, 0xa4 }, 0, .none },
+    .{ .shld, .mrc, .rm16, .r16, .cl,   .none, &.{ 0x0f, 0xa5 }, 0, .none },
+    .{ .shld, .mri, .rm32, .r32, .imm8, .none, &.{ 0x0f, 0xa4 }, 0, .none },
+    .{ .shld, .mri, .rm64, .r64, .imm8, .none, &.{ 0x0f, 0xa4 }, 0, .long },
+    .{ .shld, .mrc, .rm32, .r32, .cl,   .none, &.{ 0x0f, 0xa5 }, 0, .none },
+    .{ .shld, .mrc, .rm64, .r64, .cl,   .none, &.{ 0x0f, 0xa5 }, 0, .long },
+
     .{ .shr, .m1, .rm8,  .unity, .none, .none, &.{ 0xd0 }, 5, .none  },
     .{ .shr, .m1, .rm8,  .unity, .none, .none, &.{ 0xd0 }, 5, .rex   },
     .{ .shr, .m1, .rm16, .unity, .none, .none, &.{ 0xd1 }, 5, .none  },
@@ -709,6 +716,13 @@ pub const table = &[_]Entry{
     .{ .shr, .mi, .rm32, .imm8,  .none, .none, &.{ 0xc1 }, 5, .none  },
     .{ .shr, .mi, .rm64, .imm8,  .none, .none, &.{ 0xc1 }, 5, .long  },
 
+    .{ .shrd, .mri, .rm16, .r16, .imm8, .none, &.{ 0x0f, 0xac }, 0, .none },
+    .{ .shrd, .mrc, .rm16, .r16, .cl,   .none, &.{ 0x0f, 0xad }, 0, .none },
+    .{ .shrd, .mri, .rm32, .r32, .imm8, .none, &.{ 0x0f, 0xac }, 0, .none },
+    .{ .shrd, .mri, .rm64, .r64, .imm8, .none, &.{ 0x0f, 0xac }, 0, .long },
+    .{ .shrd, .mrc, .rm32, .r32, .cl,   .none, &.{ 0x0f, 0xad }, 0, .none },
+    .{ .shrd, .mrc, .rm64, .r64, .cl,   .none, &.{ 0x0f, 0xad }, 0, .long },
+
     .{ .stos,  .np, .m8,   .none, .none, .none, &.{ 0xaa }, 0, .none  },
     .{ .stos,  .np, .m16,  .none, .none, .none, &.{ 0xab }, 0, .none  },
     .{ .stos,  .np, .m32,  .none, .none, .none, &.{ 0xab }, 0, .none  },
src/arch/x86_64/Mir.zig
@@ -138,8 +138,12 @@ pub const Inst = struct {
         sfence,
         /// Logical shift left
         shl,
+        /// Double precision shift left
+        shld,
         /// Logical shift right
         shr,
+        /// Double precision shift right
+        shrd,
         /// Subtract
         sub,
         /// Syscall
@@ -284,10 +288,10 @@ pub const Inst = struct {
         ri64,
         /// Immediate (sign-extended) operand.
         /// Uses `imm` payload.
-        imm_s,
+        i_s,
         /// Immediate (unsigned) operand.
         /// Uses `imm` payload.
-        imm_u,
+        i_u,
         /// Relative displacement operand.
         /// Uses `imm` payload.
         rel,
@@ -316,23 +320,41 @@ pub const Inst = struct {
         /// Uses `x_cc` with extra data of type `MemoryRip`.
         m_rip_cc,
         /// Memory (SIB), immediate (unsigned) operands.
-        /// Uses `xi` payload with extra data of type `MemorySib`.
-        mi_u_sib,
+        /// Uses `ix` payload with extra data of type `MemorySib`.
+        mi_sib_u,
         /// Memory (RIP), immediate (unsigned) operands.
-        /// Uses `xi` payload with extra data of type `MemoryRip`.
-        mi_u_rip,
+        /// Uses `ix` payload with extra data of type `MemoryRip`.
+        mi_rip_u,
         /// Memory (SIB), immediate (sign-extend) operands.
-        /// Uses `xi` payload with extra data of type `MemorySib`.
-        mi_s_sib,
+        /// Uses `ix` payload with extra data of type `MemorySib`.
+        mi_sib_s,
         /// Memory (RIP), immediate (sign-extend) operands.
-        /// Uses `xi` payload with extra data of type `MemoryRip`.
-        mi_s_rip,
+        /// Uses `ix` payload with extra data of type `MemoryRip`.
+        mi_rip_s,
         /// Memory (SIB), register operands.
         /// Uses `rx` payload with extra data of type `MemorySib`.
         mr_sib,
         /// Memory (RIP), register operands.
         /// Uses `rx` payload with extra data of type `MemoryRip`.
         mr_rip,
+        /// Memory (SIB), register, register operands.
+        /// Uses `rrx` payload with extra data of type `MemorySib`.
+        mrr_sib,
+        /// Memory (RIP), register, register operands.
+        /// Uses `rrx` payload with extra data of type `MemoryRip`.
+        mrr_rip,
+        /// Memory (SIB), register, immediate (unsigned) operands.
+        /// Uses `rix` payload with extra data of type `MemorySib`.
+        mri_sib_u,
+        /// Memory (RIP), register, immediate (unsigned) operands.
+        /// Uses `rix` payload with extra data of type `MemoryRip`.
+        mri_rip_u,
+        /// Memory (SIB), register, immediate (signed) operands.
+        /// Uses `rix` payload with extra data of type `MemorySib`.
+        mri_sib_s,
+        /// Memory (RIP), register, immediate (signed) operands.
+        /// Uses `rix` payload with extra data of type `MemoryRip`.
+        mri_rip_s,
         /// Rax, Memory moffs.
         /// Uses `payload` with extra data of type `MemoryMoffs`.
         rax_moffs,
@@ -347,16 +369,16 @@ pub const Inst = struct {
         lock_m_rip,
         /// Memory (SIB), immediate (unsigned) operands with lock prefix.
         /// Uses `xi` payload with extra data of type `MemorySib`.
-        lock_mi_u_sib,
+        lock_mi_sib_u,
         /// Memory (RIP), immediate (unsigned) operands with lock prefix.
         /// Uses `xi` payload with extra data of type `MemoryRip`.
-        lock_mi_u_rip,
+        lock_mi_rip_u,
         /// Memory (SIB), immediate (sign-extend) operands with lock prefix.
         /// Uses `xi` payload with extra data of type `MemorySib`.
-        lock_mi_s_sib,
+        lock_mi_sib_s,
         /// Memory (RIP), immediate (sign-extend) operands with lock prefix.
         /// Uses `xi` payload with extra data of type `MemoryRip`.
-        lock_mi_s_rip,
+        lock_mi_rip_s,
         /// Memory (SIB), register operands with lock prefix.
         /// Uses `rx` payload with extra data of type `MemorySib`.
         lock_mr_sib,
@@ -400,7 +422,7 @@ pub const Inst = struct {
             cc: bits.Condition,
         },
         /// A 32-bit immediate value.
-        imm: u32,
+        i: u32,
         r: Register,
         rr: struct {
             r1: Register,
@@ -414,16 +436,16 @@ pub const Inst = struct {
         rri: struct {
             r1: Register,
             r2: Register,
-            imm: u32,
+            i: u32,
         },
         /// Condition code (CC), followed by custom payload found in extra.
         x_cc: struct {
-            payload: u32,
             cc: bits.Condition,
+            payload: u32,
         },
         /// Register with condition code (CC).
         r_cc: struct {
-            r1: Register,
+            r: Register,
             cc: bits.Condition,
         },
         /// Register, register with condition code (CC).
@@ -434,24 +456,36 @@ pub const Inst = struct {
         },
         /// Register, immediate.
         ri: struct {
-            r1: Register,
-            imm: u32,
+            r: Register,
+            i: u32,
         },
         /// Register, followed by custom payload found in extra.
         rx: struct {
-            r1: Register,
+            r: Register,
             payload: u32,
         },
         /// Register with condition code (CC), followed by custom payload found in extra.
         rx_cc: struct {
-            r1: Register,
+            r: Register,
             cc: bits.Condition,
             payload: u32,
         },
-        /// Custom payload followed by an immediate.
-        xi: struct {
+        /// Immediate, followed by Custom payload found in extra.
+        ix: struct {
+            i: u32,
+            payload: u32,
+        },
+        /// Register, register, followed by Custom payload found in extra.
+        rrx: struct {
+            r1: Register,
+            r2: Register,
+            payload: u32,
+        },
+        /// Register, immediate, followed by Custom payload found in extra.
+        rix: struct {
+            r: Register,
+            i: u32,
             payload: u32,
-            imm: u32,
         },
         /// String instruction prefix and width.
         string: struct {