Commit 8ee80d61f6

Jacob Young <jacobly0@users.noreply.github.com>
2025-01-18 04:38:21
x86_64: add a bunch of instruction encodings
Closes #19773
1 parent db8ed73
lib/std/zig/system/x86.zig
@@ -419,6 +419,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         // detecting features using the "-march=native" flag.
         // For more info, see X86 ISA docs.
         setFeature(cpu, .pconfig, bit(leaf.edx, 18));
+        setFeature(cpu, .uintr, bit(leaf.edx, 5));
 
         // TODO I feel unsure about this check.
         //      It doesn't really seem to check for 7.1, just for 7.
src/arch/x86_64/bits.zig
@@ -177,7 +177,7 @@ pub const Condition = enum(u5) {
     }
 };
 
-pub const Register = enum(u7) {
+pub const Register = enum(u8) {
     // zig fmt: off
     rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
     r8, r9, r10, r11, r12, r13, r14, r15,
@@ -207,6 +207,12 @@ pub const Register = enum(u7) {
 
     rip, eip, ip,
 
+    cr0, cr1, cr2,  cr3,  cr4,  cr5,  cr6,  cr7,
+    cr8, cr9, cr10, cr11, cr12, cr13, cr14, cr15,
+
+    dr0, dr1, dr2,  dr3,  dr4,  dr5,  dr6,  dr7,
+    dr8, dr9, dr10, dr11, dr12, dr13, dr14, dr15,
+
     none,
     // zig fmt: on
 
@@ -217,6 +223,8 @@ pub const Register = enum(u7) {
         mmx,
         sse,
         ip,
+        cr,
+        dr,
     };
 
     pub fn class(reg: Register) Class {
@@ -235,13 +243,15 @@ pub const Register = enum(u7) {
 
             @intFromEnum(Register.es)   ... @intFromEnum(Register.gs)    => .segment,
             @intFromEnum(Register.rip)  ... @intFromEnum(Register.ip)    => .ip,
+            @intFromEnum(Register.cr0)  ... @intFromEnum(Register.cr15)  => .cr,
+            @intFromEnum(Register.dr0)  ... @intFromEnum(Register.dr15)  => .dr,
 
             else => unreachable,
             // zig fmt: on
         };
     }
 
-    pub fn id(reg: Register) u6 {
+    pub fn id(reg: Register) u7 {
         const base = switch (@intFromEnum(reg)) {
             // zig fmt: off
             @intFromEnum(Register.rax)  ... @intFromEnum(Register.r15)   => @intFromEnum(Register.rax),
@@ -254,8 +264,9 @@ pub const Register = enum(u7) {
             @intFromEnum(Register.xmm0) ... @intFromEnum(Register.xmm15) => @intFromEnum(Register.xmm0) - 16,
             @intFromEnum(Register.mm0)  ... @intFromEnum(Register.mm7)   => @intFromEnum(Register.mm0) - 32,
             @intFromEnum(Register.st0)  ... @intFromEnum(Register.st7)   => @intFromEnum(Register.st0) - 40,
-
             @intFromEnum(Register.es)   ... @intFromEnum(Register.gs)    => @intFromEnum(Register.es) - 48,
+            @intFromEnum(Register.cr0)  ... @intFromEnum(Register.cr15)  => @intFromEnum(Register.cr0) - 54,
+            @intFromEnum(Register.dr0)  ... @intFromEnum(Register.dr15)  => @intFromEnum(Register.dr0) - 70,
 
             else => unreachable,
             // zig fmt: on
@@ -279,6 +290,9 @@ pub const Register = enum(u7) {
 
             @intFromEnum(Register.es)   ... @intFromEnum(Register.gs)    => 16,
 
+            @intFromEnum(Register.cr0)  ... @intFromEnum(Register.cr15)  => 64,
+            @intFromEnum(Register.dr0)  ... @intFromEnum(Register.dr15)  => 64,
+
             else => unreachable,
             // zig fmt: on
         };
@@ -295,6 +309,9 @@ pub const Register = enum(u7) {
             @intFromEnum(Register.ymm8) ... @intFromEnum(Register.ymm15) => true,
             @intFromEnum(Register.xmm8) ... @intFromEnum(Register.xmm15) => true,
 
+            @intFromEnum(Register.cr8)  ... @intFromEnum(Register.cr15)  => true,
+            @intFromEnum(Register.dr8)  ... @intFromEnum(Register.dr15)  => true,
+
             else => false,
             // zig fmt: on
         };
@@ -316,6 +333,9 @@ pub const Register = enum(u7) {
 
             @intFromEnum(Register.es)   ... @intFromEnum(Register.gs)    => @intFromEnum(Register.es),
 
+            @intFromEnum(Register.cr0)  ... @intFromEnum(Register.cr15)  => @intFromEnum(Register.cr0),
+            @intFromEnum(Register.dr0)  ... @intFromEnum(Register.dr15)  => @intFromEnum(Register.dr0),
+
             else => unreachable,
             // zig fmt: on
         };
@@ -397,6 +417,7 @@ pub const Register = enum(u7) {
             .mmx => 41 + @as(u6, reg.enc()),
             .segment => 50 + @as(u6, reg.enc()),
             .ip => 16,
+            .cr, .dr => unreachable,
         };
     }
 };
src/arch/x86_64/CodeGen.zig
@@ -1375,6 +1375,14 @@ fn asmOps(self: *CodeGen, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void {
         },
         .imm => |imm0| switch (ops[1]) {
             .none => self.asmImmediate(tag, imm0),
+            .reg => |reg1| switch (ops[2]) {
+                .none => self.asmImmediateRegister(tag, imm0, reg1),
+                else => error.InvalidInstruction,
+            },
+            .imm => |imm1| switch (ops[2]) {
+                .none => self.asmImmediateImmediate(tag, imm0, imm1),
+                else => error.InvalidInstruction,
+            },
             else => error.InvalidInstruction,
         },
         .inst => |inst0| switch (ops[1]) {
@@ -1491,9 +1499,10 @@ fn asmSetccMemory(self: *CodeGen, cc: Condition, m: Memory) !void {
 
 fn asmJmpReloc(self: *CodeGen, target: Mir.Inst.Index) !Mir.Inst.Index {
     return self.addInst(.{
-        .tag = .jmp,
+        .tag = .j,
         .ops = .inst,
         .data = .{ .inst = .{
+            .fixes = ._mp,
             .inst = target,
         } },
     });
@@ -1753,6 +1762,42 @@ fn asmImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, imm: Immediate) !void {
     });
 }
 
+fn asmImmediateRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, imm: Immediate, reg: Register) !void {
+    _ = try self.addInst(.{
+        .tag = tag[1],
+        .ops = .ir,
+        .data = .{ .ri = .{
+            .fixes = tag[0],
+            .r1 = reg,
+            .i = @as(u8, switch (imm) {
+                .signed => |s| @bitCast(@as(i8, @intCast(s))),
+                .unsigned => |u| @intCast(u),
+                .reloc => unreachable,
+            }),
+        } },
+    });
+}
+
+fn asmImmediateImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, imm1: Immediate, imm2: Immediate) !void {
+    _ = try self.addInst(.{
+        .tag = tag[1],
+        .ops = .ii,
+        .data = .{ .ii = .{
+            .fixes = tag[0],
+            .i1 = switch (imm1) {
+                .signed => |s| @bitCast(@as(i16, @intCast(s))),
+                .unsigned => |u| @intCast(u),
+                .reloc => unreachable,
+            },
+            .i2 = switch (imm2) {
+                .signed => |s| @bitCast(@as(i8, @intCast(s))),
+                .unsigned => |u| @intCast(u),
+                .reloc => unreachable,
+            },
+        } },
+    });
+}
+
 fn asmRegisterRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void {
     _ = try self.addInst(.{
         .tag = tag[1],
@@ -4188,8 +4233,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 _ = try cg.asmJmpReloc(loop.target);
             },
             .br => try cg.airBr(inst),
-            .trap => try cg.asmOpOnly(.{ ._, .ud2 }),
-            .breakpoint => try cg.asmOpOnly(.{ ._, .int3 }),
+            .trap => try cg.asmOpOnly(.{ ._2, .ud }),
+            .breakpoint => try cg.asmOpOnly(.{ ._3, .int }),
             .ret_addr => if (use_old) try cg.airRetAddr(inst) else {
                 var slot = try cg.tempInit(.usize, .{ .load_frame = .{
                     .index = .ret_addr,
@@ -4233,7 +4278,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .inc, .dst0b, ._, ._, ._ },
+                        .{ ._, ._c, .in, .dst0b, ._, ._, ._ },
                     } },
                 }, .{
                     .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any },
@@ -5643,7 +5688,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -5695,7 +5740,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -5747,7 +5792,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ },
                         .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -5799,7 +5844,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ },
                         .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -5857,7 +5902,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -5915,7 +5960,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._c, .st, ._, ._, ._, ._ },
                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -5970,7 +6015,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6028,7 +6073,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6086,7 +6131,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._c, .st, ._, ._, ._, ._ },
                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6141,7 +6186,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6199,7 +6244,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6257,7 +6302,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._c, .st, ._, ._, ._, ._ },
                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6312,7 +6357,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6370,7 +6415,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6428,7 +6473,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._c, .st, ._, ._, ._, ._ },
                         .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ },
                         .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -6484,7 +6529,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ },
-                        .{ ._, ._, .inc, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
@@ -10094,7 +10139,7 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
                 data_off += @intCast(tag_name_len + 1);
             }
 
-            try self.asmOpOnly(.{ ._, .ud2 });
+            try self.asmOpOnly(.{ ._2, .ud });
 
             for (epilogue_relocs) |reloc| self.performReloc(reloc);
             try self.asmOpOnly(.{ ._, .ret });
@@ -10373,7 +10418,7 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class {
 fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet {
     return switch (rc) {
         .general_purpose => abi.RegisterClass.gp,
-        .segment, .ip => unreachable,
+        .segment, .ip, .cr, .dr => unreachable,
         .x87 => abi.RegisterClass.x87,
         .mmx => @panic("TODO"),
         .sse => abi.RegisterClass.sse,
@@ -12195,8 +12240,8 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1));
                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32());
-                    try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32());
+                    try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32());
+                    try self.asmRegister(.{ ._c, .in }, temp_regs[3].to32());
                 }
                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len));
                 _ = try self.asmJccReloc(.b, inner_loop);
@@ -12209,7 +12254,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
                 if (slow_inc) {
                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32());
+                    try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32());
                 }
                 try self.asmMemoryImmediate(.{ ._, .cmp }, .{
                     .base = .{ .frame = lhs_mcv.load_frame.index },
@@ -12236,7 +12281,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void {
                 if (slow_inc) {
                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32());
+                    try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32());
                 }
                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len));
                 _ = try self.asmJccReloc(.b, outer_loop);
@@ -13938,7 +13983,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void {
             if (self.hasFeature(.slow_incdec)) {
                 try self.asmRegisterImmediate(.{ ._, .sub }, index_reg.to32(), .u(1));
             } else {
-                try self.asmRegister(.{ ._, .dec }, index_reg.to32());
+                try self.asmRegister(.{ ._c, .de }, index_reg.to32());
             }
             try self.asmMemoryImmediate(.{ ._, .cmp }, .{
                 .base = .{ .frame = src_frame_addr.index },
@@ -14133,7 +14178,7 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void {
             if (self.hasFeature(.slow_incdec)) {
                 try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1));
             } else {
-                try self.asmRegister(.{ ._, .inc }, index_reg.to32());
+                try self.asmRegister(.{ ._c, .in }, index_reg.to32());
             }
             try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len));
             const zero = try self.asmJccReloc(.nb, undefined);
@@ -14535,8 +14580,8 @@ fn genByteSwap(
                 try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
                 try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1));
             } else {
-                try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32());
-                try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32());
+                try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32());
+                try self.asmRegister(.{ ._c, .de }, temp_regs[1].to32());
             }
             try self.asmRegisterRegister(.{ ._, .cmp }, temp_regs[0].to32(), temp_regs[1].to32());
             _ = try self.asmJccReloc(.be, loop);
@@ -15113,7 +15158,7 @@ fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void {
                     if (self.hasFeature(.slow_incdec)) {
                         try self.asmRegisterImmediate(.{ ._, .add }, tmp_regs[0].to32(), .u(1));
                     } else {
-                        try self.asmRegister(.{ ._, .inc }, tmp_regs[0].to32());
+                        try self.asmRegister(.{ ._c, .in }, tmp_regs[0].to32());
                     }
                     try self.asmRegisterImmediate(.{ ._, .cmp }, tmp_regs[0].to32(), .u(limb_len));
                     _ = try self.asmJccReloc(.b, neg_loop);
@@ -16452,8 +16497,8 @@ fn genShiftBinOpMir(
                         try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1));
                         try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[0].to32(), .u(1));
                     } else {
-                        try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32());
-                        try self.asmRegister(.{ ._, .dec }, temp_regs[0].to32());
+                        try self.asmRegister(.{ ._c, .de }, temp_regs[1].to32());
+                        try self.asmRegister(.{ ._c, .de }, temp_regs[0].to32());
                     }
                     _ = try self.asmJccReloc(.nz, loop);
                 },
@@ -16462,8 +16507,8 @@ fn genShiftBinOpMir(
                         try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1));
                         try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
                     } else {
-                        try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32());
-                        try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32());
+                        try self.asmRegister(.{ ._c, .in }, temp_regs[1].to32());
+                        try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32());
                     }
                     try self.asmRegisterImmediate(
                         .{ ._, .cmp },
@@ -16532,12 +16577,12 @@ fn genShiftBinOpMir(
                 ._l => if (slow_inc_dec) {
                     try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32());
+                    try self.asmRegister(.{ ._c, .de }, temp_regs[1].to32());
                 },
                 ._r => if (slow_inc_dec) {
                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32());
+                    try self.asmRegister(.{ ._c, .in }, temp_regs[1].to32());
                 },
                 else => unreachable,
             }
@@ -17163,8 +17208,8 @@ fn genMulDivBinOp(
                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1));
                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32());
-                    try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32());
+                    try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32());
+                    try self.asmRegister(.{ ._c, .in }, temp_regs[3].to32());
                 }
                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len));
                 _ = try self.asmJccReloc(.b, inner_loop);
@@ -17173,7 +17218,7 @@ fn genMulDivBinOp(
                 if (slow_inc) {
                     try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32());
+                    try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32());
                 }
                 try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len));
                 _ = try self.asmJccReloc(.b, outer_loop);
@@ -19765,7 +19810,7 @@ fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void {
                 if (self.hasFeature(.slow_incdec)) {
                     try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .inc }, index_reg.to32());
+                    try self.asmRegister(.{ ._c, .in }, index_reg.to32());
                 }
                 try self.asmRegisterImmediate(
                     .{ ._, .cmp },
@@ -20042,7 +20087,7 @@ fn genCall(self: *CodeGen, info: union(enum) {
                 if (self.hasFeature(.slow_incdec)) {
                     try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1));
                 } else {
-                    try self.asmRegister(.{ ._, .inc }, index_reg.to32());
+                    try self.asmRegister(.{ ._c, .in }, index_reg.to32());
                 }
                 try self.asmRegisterImmediate(
                     .{ ._, .cmp },
@@ -21423,7 +21468,7 @@ fn lowerSwitchBr(
             defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock);
             try self.truncateRegister(condition_ty, condition_index_reg);
             const ptr_size = @divExact(self.target.ptrBitWidth(), 8);
-            try self.asmMemory(.{ ._, .jmp }, .{
+            try self.asmMemory(.{ ._mp, .j }, .{
                 .base = .table,
                 .mod = .{ .rm = .{
                     .size = .ptr,
@@ -21720,7 +21765,7 @@ fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void {
             defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock);
             try self.truncateRegister(condition_ty, condition_index_reg);
             const ptr_size = @divExact(self.target.ptrBitWidth(), 8);
-            try self.asmMemory(.{ ._, .jmp }, .{
+            try self.asmMemory(.{ ._mp, .j }, .{
                 .base = .table,
                 .mod = .{ .rm = .{
                     .size = .ptr,
@@ -21777,7 +21822,7 @@ fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void {
 fn performReloc(self: *CodeGen, reloc: Mir.Inst.Index) void {
     const next_inst: u32 = @intCast(self.mir_instructions.len);
     switch (self.mir_instructions.items(.tag)[reloc]) {
-        .j, .jmp => {},
+        .j => {},
         .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) {
             .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {},
             else => unreachable,
@@ -22149,65 +22194,52 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
             prefix = .directive;
         }
 
-        var mnem_size: ?Memory.Size = if (prefix == .directive)
-            null
-        else if (std.mem.endsWith(u8, mnem_str, "b"))
-            .byte
-        else if (std.mem.endsWith(u8, mnem_str, "w"))
-            .word
-        else if (std.mem.endsWith(u8, mnem_str, "l"))
-            .dword
-        else if (std.mem.endsWith(u8, mnem_str, "q") and
-            (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !std.mem.endsWith(u8, mnem_str, "dq")))
-            .qword
-        else if (std.mem.endsWith(u8, mnem_str, "t"))
-            .tbyte
-        else
-            null;
-        const mnem_tag = while (true) break std.meta.stringToEnum(
+        var mnem_size: struct {
+            used: bool,
+            size: ?Memory.Size,
+            fn use(size: *@This()) ?Memory.Size {
+                size.used = true;
+                return size.size;
+            }
+        } = .{
+            .used = false,
+            .size = if (prefix == .directive)
+                null
+            else if (std.mem.endsWith(u8, mnem_str, "b"))
+                .byte
+            else if (std.mem.endsWith(u8, mnem_str, "w"))
+                .word
+            else if (std.mem.endsWith(u8, mnem_str, "l"))
+                .dword
+            else if (std.mem.endsWith(u8, mnem_str, "q") and
+                (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !std.mem.endsWith(u8, mnem_str, "dq")))
+                .qword
+            else if (std.mem.endsWith(u8, mnem_str, "t"))
+                .tbyte
+            else
+                null,
+        };
+        var mnem_tag = while (true) break std.meta.stringToEnum(
             encoder.Instruction.Mnemonic,
-            mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size != null)],
-        ) orelse if (mnem_size) |_| {
-            mnem_size = null;
+            mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size.size != null)],
+        ) orelse if (mnem_size.size) |_| {
+            mnem_size.size = null;
             continue;
         } else return self.fail("invalid mnemonic: '{s}'", .{mnem_str});
         if (@as(?Memory.Size, switch (mnem_tag) {
             .clflush => .byte,
+            .fldcw, .fnstcw, .fstcw, .fnstsw, .fstsw => .word,
             .fldenv, .fnstenv, .fstenv => .none,
+            .frstor, .fsave, .fnsave, .fxrstor, .fxrstor64, .fxsave, .fxsave64 => .none,
+            .invlpg => .none,
+            .invpcid => .xword,
             .ldmxcsr, .stmxcsr, .vldmxcsr, .vstmxcsr => .dword,
             else => null,
         })) |fixed_mnem_size| {
-            if (mnem_size) |size| if (size != fixed_mnem_size)
+            if (mnem_size.size) |size| if (size != fixed_mnem_size)
                 return self.fail("invalid size: '{s}'", .{mnem_str});
-            mnem_size = fixed_mnem_size;
+            mnem_size.size = fixed_mnem_size;
         }
-        const mnem_name = @tagName(mnem_tag);
-        const mnem_fixed_tag: Mir.Inst.FixedTag = if (prefix == .directive)
-            .{ ._, .pseudo }
-        else for (std.enums.values(Mir.Inst.Fixes)) |fixes| {
-            const fixes_name = @tagName(fixes);
-            const space_i = std.mem.indexOfScalar(u8, fixes_name, ' ');
-            const fixes_prefix = if (space_i) |i|
-                std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).?
-            else
-                .none;
-            if (fixes_prefix != prefix) continue;
-            const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..];
-            const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?;
-            const mnem_prefix = pattern[0..wildcard_i];
-            const mnem_suffix = pattern[wildcard_i + "_".len ..];
-            if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue;
-            if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue;
-            break .{ fixes, std.meta.stringToEnum(
-                Mir.Inst.Tag,
-                mnem_name[mnem_prefix.len .. mnem_name.len - mnem_suffix.len],
-            ) orelse continue };
-        } else {
-            assert(prefix != .none); // no combination of fixes produced a known mnemonic
-            return self.fail("invalid prefix for mnemonic: '{s} {s}'", .{
-                @tagName(prefix), mnem_name,
-            });
-        };
 
         var ops: [4]Operand = @splat(.none);
         var ops_len: usize = 0;
@@ -22236,12 +22268,13 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
                     op.* = .{ .mem = .{
                         .base = .{ .reg = reg },
                         .mod = .{ .rm = .{
-                            .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}),
+                            .size = mnem_size.use() orelse
+                                return self.fail("unknown size: '{s}'", .{op_str}),
                             .disp = disp,
                         } },
                     } };
                 } else {
-                    if (mnem_size) |size| if (reg.bitSize() != size.bitSize(self.target))
+                    if (mnem_size.use()) |size| if (reg.bitSize() != size.bitSize(self.target))
                         return self.fail("invalid register size: '{s}'", .{op_str});
                     op.* = .{ .reg = reg };
                 }
@@ -22260,14 +22293,17 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
                     else
                         return self.fail("invalid modifier: '{s}'", .{modifier}),
                     .register => |reg| if (std.mem.eql(u8, modifier, ""))
-                        .{ .reg = reg }
+                        .{ .reg = if (mnem_size.use()) |size|
+                            registerAlias(reg, @intCast(@divExact(size.bitSize(self.target), 8)))
+                        else
+                            reg }
                     else
                         return self.fail("invalid modifier: '{s}'", .{modifier}),
                     .memory => |addr| if (std.mem.eql(u8, modifier, "") or std.mem.eql(u8, modifier, "P"))
                         .{ .mem = .{
                             .base = .{ .reg = .ds },
                             .mod = .{ .rm = .{
-                                .size = mnem_size orelse
+                                .size = mnem_size.use() orelse
                                     return self.fail("unknown size: '{s}'", .{op_str}),
                                 .disp = @intCast(@as(i64, @bitCast(addr))),
                             } },
@@ -22278,7 +22314,7 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
                         .{ .mem = .{
                             .base = .{ .reg = reg_off.reg },
                             .mod = .{ .rm = .{
-                                .size = mnem_size orelse
+                                .size = mnem_size.use() orelse
                                     return self.fail("unknown size: '{s}'", .{op_str}),
                                 .disp = reg_off.off,
                             } },
@@ -22289,7 +22325,7 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
                         .{ .mem = .{
                             .base = .{ .frame = frame_addr.index },
                             .mod = .{ .rm = .{
-                                .size = mnem_size orelse
+                                .size = mnem_size.use() orelse
                                     return self.fail("unknown size: '{s}'", .{op_str}),
                                 .disp = frame_addr.off,
                             } },
@@ -22307,21 +22343,12 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
                     else => return self.fail("invalid constraint: '{s}'", .{op_str}),
                 };
             } else if (std.mem.startsWith(u8, op_str, "$")) {
-                if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| {
-                    if (mnem_size) |size| {
-                        const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize(self.target) - 1));
-                        if ((if (s < 0) ~s else s) > max)
-                            return self.fail("invalid immediate size: '{s}'", .{op_str});
-                    }
-                    op.* = .{ .imm = .s(s) };
-                } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| {
-                    if (mnem_size) |size| {
-                        const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize(self.target));
-                        if (u > max)
-                            return self.fail("invalid immediate size: '{s}'", .{op_str});
-                    }
-                    op.* = .{ .imm = .u(u) };
-                } else |_| return self.fail("invalid immediate: '{s}'", .{op_str});
+                op.* = if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u|
+                    .{ .imm = .u(u) }
+                else |_| if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s|
+                    .{ .imm = .s(s) }
+                else |_|
+                    return self.fail("invalid immediate: '{s}'", .{op_str});
             } else if (std.mem.endsWith(u8, op_str, ")")) {
                 const open = std.mem.indexOfScalar(u8, op_str, '(') orelse
                     return self.fail("invalid operand: '{s}'", .{op_str});
@@ -22348,49 +22375,47 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
                 else
                     .@"1";
                 if (sib_it.next()) |_| return self.fail("invalid memory operand: '{s}'", .{op_str});
-                op.* = .{
-                    .mem = .{
-                        .base = if (base_str.len > 0)
-                            .{ .reg = parseRegName(base_str["%%".len..]) orelse
-                                return self.fail("invalid base register: '{s}'", .{base_str}) }
+                op.* = if (std.mem.eql(u8, base_str, "%%dx") and index_str.len == 0) .{ .reg = .dx } else .{ .mem = .{
+                    .base = if (base_str.len > 0)
+                        .{ .reg = parseRegName(base_str["%%".len..]) orelse
+                            return self.fail("invalid base register: '{s}'", .{base_str}) }
+                    else
+                        .none,
+                    .mod = .{ .rm = .{
+                        .size = mnem_size.use() orelse return self.fail("unknown size: '{s}'", .{op_str}),
+                        .index = if (index_str.len > 0)
+                            parseRegName(index_str["%%".len..]) orelse
+                                return self.fail("invalid index register: '{s}'", .{op_str})
                         else
                             .none,
-                        .mod = .{ .rm = .{
-                            .size = mnem_size orelse return self.fail("unknown size: '{s}'", .{op_str}),
-                            .index = if (index_str.len > 0)
-                                parseRegName(index_str["%%".len..]) orelse
-                                    return self.fail("invalid index register: '{s}'", .{op_str})
+                        .scale = scale,
+                        .disp = if (std.mem.startsWith(u8, op_str[0..open], "%[") and
+                            std.mem.endsWith(u8, op_str[0..open], "]"))
+                        disp: {
+                            const colon = std.mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':');
+                            const modifier = if (colon) |colon_pos|
+                                op_str[colon_pos + ":".len .. open - "]".len]
                             else
-                                .none,
-                            .scale = scale,
-                            .disp = if (std.mem.startsWith(u8, op_str[0..open], "%[") and
-                                std.mem.endsWith(u8, op_str[0..open], "]"))
-                            disp: {
-                                const colon = std.mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':');
-                                const modifier = if (colon) |colon_pos|
-                                    op_str[colon_pos + ":".len .. open - "]".len]
+                                "";
+                            break :disp switch (args.items[
+                                arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse
+                                    return self.fail("no matching constraint: '{s}'", .{op_str})
+                            ]) {
+                                .immediate => |imm| if (std.mem.eql(u8, modifier, "") or
+                                    std.mem.eql(u8, modifier, "c"))
+                                    std.math.cast(i32, @as(i64, @bitCast(imm))) orelse
+                                        return self.fail("invalid displacement: '{s}'", .{op_str})
                                 else
-                                    "";
-                                break :disp switch (args.items[
-                                    arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse
-                                        return self.fail("no matching constraint: '{s}'", .{op_str})
-                                ]) {
-                                    .immediate => |imm| if (std.mem.eql(u8, modifier, "") or
-                                        std.mem.eql(u8, modifier, "c"))
-                                        std.math.cast(i32, @as(i64, @bitCast(imm))) orelse
-                                            return self.fail("invalid displacement: '{s}'", .{op_str})
-                                    else
-                                        return self.fail("invalid modifier: '{s}'", .{modifier}),
-                                    else => return self.fail("invalid constraint: '{s}'", .{op_str}),
-                                };
-                            } else if (open > 0)
-                                std.fmt.parseInt(i32, op_str[0..open], 0) catch
-                                    return self.fail("invalid displacement: '{s}'", .{op_str})
-                            else
-                                0,
-                        } },
-                    },
-                };
+                                    return self.fail("invalid modifier: '{s}'", .{modifier}),
+                                else => return self.fail("invalid constraint: '{s}'", .{op_str}),
+                            };
+                        } else if (open > 0)
+                            std.fmt.parseInt(i32, op_str[0..open], 0) catch
+                                return self.fail("invalid displacement: '{s}'", .{op_str})
+                        else
+                            0,
+                    } },
+                } };
             } else if (Label.isValid(.reference, op_str)) {
                 const anon = std.ascii.isDigit(op_str[0]);
                 const label_gop = try labels.getOrPut(self.gpa, op_str[0..if (anon) 1 else op_str.len]);
@@ -22410,6 +22435,51 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
 
         // convert from att syntax to intel syntax
         std.mem.reverse(Operand, ops[0..ops_len]);
+        if (!mnem_size.used) if (mnem_size.size) |size| {
+            comptime var max_mnem_len: usize = 0;
+            inline for (@typeInfo(encoder.Instruction.Mnemonic).@"enum".fields) |mnem|
+                max_mnem_len = @max(mnem.name.len, max_mnem_len);
+            var intel_mnem_buf: [max_mnem_len + 1]u8 = undefined;
+            const intel_mnem_str = std.fmt.bufPrint(&intel_mnem_buf, "{s}{c}", .{
+                @tagName(mnem_tag),
+                @as(u8, switch (size) {
+                    .byte => 'b',
+                    .word => 'w',
+                    .dword => 'd',
+                    .qword => 'q',
+                    .tbyte => 't',
+                    else => unreachable,
+                }),
+            }) catch unreachable;
+            if (std.meta.stringToEnum(encoder.Instruction.Mnemonic, intel_mnem_str)) |intel_mnem_tag| mnem_tag = intel_mnem_tag;
+        };
+        const mnem_name = @tagName(mnem_tag);
+        const mnem_fixed_tag: Mir.Inst.FixedTag = if (prefix == .directive)
+            .{ ._, .pseudo }
+        else for (std.enums.values(Mir.Inst.Fixes)) |fixes| {
+            const fixes_name = @tagName(fixes);
+            const space_i = std.mem.indexOfScalar(u8, fixes_name, ' ');
+            const fixes_prefix = if (space_i) |i|
+                std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).?
+            else
+                .none;
+            if (fixes_prefix != prefix) continue;
+            const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..];
+            const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?;
+            const mnem_prefix = pattern[0..wildcard_i];
+            const mnem_suffix = pattern[wildcard_i + "_".len ..];
+            if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue;
+            if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue;
+            break .{ fixes, std.meta.stringToEnum(
+                Mir.Inst.Tag,
+                mnem_name[mnem_prefix.len .. mnem_name.len - mnem_suffix.len],
+            ) orelse continue };
+        } else {
+            assert(prefix != .none); // no combination of fixes produced a known mnemonic
+            return self.fail("invalid prefix for mnemonic: '{s} {s}'", .{
+                @tagName(prefix), mnem_name,
+            });
+        };
 
         (if (prefix == .directive) switch (mnem_tag) {
             .@".cfi_def_cfa" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none)
@@ -22815,7 +22885,7 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
                 else => {},
             },
         },
-        .ip => {},
+        .ip, .cr, .dr => {},
     }
     return self.fail("TODO moveStrategy for {}", .{ty.fmt(pt)});
 }
@@ -22900,13 +22970,13 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C
                     for (dst_regs, &hazard_regs, 1..) |dst_reg, src_reg, hazard_index| {
                         const dst_id = dst_reg.id();
                         if (dst_id == src_reg.id()) continue;
-                        var mir_tag: Mir.Inst.Tag = .mov;
+                        var mir_tag: Mir.Inst.FixedTag = .{ ._, .mov };
                         for (hazard_regs[hazard_index..]) |*hazard_reg| {
                             if (dst_id != hazard_reg.id()) continue;
-                            mir_tag = .xchg;
+                            mir_tag = .{ ._g, .xch };
                             hazard_reg.* = src_reg;
                         }
-                        try self.asmRegisterRegister(.{ ._, mir_tag }, dst_reg.to64(), src_reg.to64());
+                        try self.asmRegisterRegister(mir_tag, dst_reg.to64(), src_reg.to64());
                     }
                     return;
                 },
@@ -23025,7 +23095,7 @@ fn genSetReg(
                 else => unreachable,
             },
             .segment, .x87, .mmx, .sse => try self.genSetReg(dst_reg, ty, try self.genTypedValue(try pt.undefValue(ty)), opts),
-            .ip => unreachable,
+            .ip, .cr, .dr => unreachable,
         },
         .eflags => |cc| try self.asmSetccRegister(cc, dst_reg.to8()),
         .immediate => |imm| {
@@ -23063,7 +23133,7 @@ fn genSetReg(
                     registerAlias(dst_reg, abi_size),
                     src_reg,
                 ),
-                .x87, .mmx, .ip => unreachable,
+                .x87, .mmx, .ip, .cr, .dr => unreachable,
                 .sse => if (self.hasFeature(.sse2)) try self.asmRegisterRegister(
                     switch (abi_size) {
                         1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
@@ -23092,7 +23162,7 @@ fn genSetReg(
                 dst_reg,
                 switch (src_reg.class()) {
                     .general_purpose, .segment => registerAlias(src_reg, abi_size),
-                    .x87, .mmx, .ip => unreachable,
+                    .x87, .mmx, .ip, .cr, .dr => unreachable,
                     .sse => try self.copyToTmpRegister(ty, src_mcv),
                 },
             ),
@@ -23107,7 +23177,7 @@ fn genSetReg(
                     },
                     else => unreachable,
                 },
-                .mmx, .sse, .ip => unreachable,
+                .mmx, .sse, .ip, .cr, .dr => unreachable,
             },
             .mmx => unreachable,
             .sse => switch (src_reg.class()) {
@@ -23126,7 +23196,7 @@ fn genSetReg(
                     .{ .register = try self.copyToTmpRegister(ty, src_mcv) },
                     opts,
                 ),
-                .x87, .mmx, .ip => unreachable,
+                .x87, .mmx, .ip, .cr, .dr => unreachable,
                 .sse => try self.asmRegisterRegister(
                     @as(?Mir.Inst.FixedTag, switch (ty.scalarType(zcu).zigTypeTag(zcu)) {
                         else => switch (abi_size) {
@@ -23153,7 +23223,7 @@ fn genSetReg(
                     registerAlias(src_reg, abi_size),
                 ),
             },
-            .ip => unreachable,
+            .ip, .cr, .dr => unreachable,
         },
         inline .register_pair,
         .register_triple,
@@ -23294,7 +23364,7 @@ fn genSetReg(
                         });
                         return;
                     },
-                    .segment, .mmx, .ip => unreachable,
+                    .segment, .mmx, .ip, .cr, .dr => unreachable,
                     .x87, .sse => {},
                 },
                 .load_direct => |sym_index| switch (dst_reg.class()) {
@@ -23309,7 +23379,7 @@ fn genSetReg(
                         });
                         return;
                     },
-                    .segment, .mmx, .ip => unreachable,
+                    .segment, .mmx, .ip, .cr, .dr => unreachable,
                     .x87, .sse => {},
                 },
                 .load_got, .load_tlv => {},
@@ -23456,7 +23526,7 @@ fn genSetMem(
             };
             const src_alias = registerAlias(src_reg, abi_size);
             const src_size: u32 = @intCast(switch (src_alias.class()) {
-                .general_purpose, .segment, .x87, .ip => @divExact(src_alias.bitSize(), 8),
+                .general_purpose, .segment, .x87, .ip, .cr, .dr => @divExact(src_alias.bitSize(), 8),
                 .mmx, .sse => abi_size,
             });
             const src_align: InternPool.Alignment = .fromNonzeroByteUnits(
@@ -24240,18 +24310,18 @@ fn atomicOp(
     };
     switch (strat) {
         .lock => {
-            const tag: Mir.Inst.Tag = if (rmw_op) |op| switch (op) {
-                .Xchg => if (unused) .mov else .xchg,
-                .Add => if (unused) .add else .xadd,
-                .Sub => if (unused) .sub else .xadd,
-                .And => .@"and",
-                .Or => .@"or",
-                .Xor => .xor,
+            const mir_tag: Mir.Inst.FixedTag = if (rmw_op) |op| switch (op) {
+                .Xchg => if (unused) .{ ._, .mov } else .{ ._g, .xch },
+                .Add => .{ .@"lock _", if (unused) .add else .xadd },
+                .Sub => .{ .@"lock _", if (unused) .sub else .xadd },
+                .And => .{ .@"lock _", .@"and" },
+                .Or => .{ .@"lock _", .@"or" },
+                .Xor => .{ .@"lock _", .xor },
                 else => unreachable,
             } else switch (order) {
-                .unordered, .monotonic, .release, .acq_rel => .mov,
+                .unordered, .monotonic, .release, .acq_rel => .{ ._, .mov },
                 .acquire => unreachable,
-                .seq_cst => .xchg,
+                .seq_cst => .{ ._g, .xch },
             };
 
             const dst_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
@@ -24260,18 +24330,10 @@ fn atomicOp(
             defer self.register_manager.unlockReg(dst_lock);
 
             try self.genSetReg(dst_reg, val_ty, val_mcv, .{});
-            if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) {
+            if (rmw_op == std.builtin.AtomicRmwOp.Sub and mir_tag[1] == .xadd) {
                 try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv);
             }
-            try self.asmMemoryRegister(
-                switch (tag) {
-                    .mov, .xchg => .{ ._, tag },
-                    .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag },
-                    else => unreachable,
-                },
-                ptr_mem,
-                registerAlias(dst_reg, val_abi_size),
-            );
+            try self.asmMemoryRegister(mir_tag, ptr_mem, registerAlias(dst_reg, val_abi_size));
 
             return if (unused) .unreach else dst_mcv;
         },
@@ -27599,7 +27661,7 @@ fn resolveCallingConventionValues(
                         break :return_value .init(.{ .register = registerAlias(ret_gpr[0], ret_size) })
                     else if (ret_gpr.len >= 2 and ret_ty.isSliceAtRuntime(zcu))
                         break :return_value .init(.{ .register_pair = ret_gpr[0..2].* }),
-                    .segment, .mmx, .ip => unreachable,
+                    .segment, .mmx, .ip, .cr, .dr => unreachable,
                     .x87 => break :return_value .init(.{ .register = .st0 }),
                     .sse => if (ret_size <= self.vectorSize(.float)) break :return_value .init(.{
                         .register = registerAlias(abi.getCAbiSseReturnRegs(cc)[0], @max(ret_size, 16)),
@@ -27634,7 +27696,7 @@ fn resolveCallingConventionValues(
                         param_gpr = param_gpr[2..];
                         continue;
                     },
-                    .segment, .mmx, .ip => unreachable,
+                    .segment, .mmx, .ip, .cr, .dr => unreachable,
                     .x87 => if (param_x87.len >= 1) {
                         arg.* = .{ .register = param_x87[0] };
                         param_x87 = param_x87[1..];
@@ -27686,9 +27748,9 @@ fn failMsg(self: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail }
 }
 
 fn parseRegName(name: []const u8) ?Register {
-    if (@hasDecl(Register, "parseRegName")) {
-        return Register.parseRegName(name);
-    }
+    if (std.mem.startsWith(u8, name, "db")) return @enumFromInt(
+        @intFromEnum(Register.dr0) + (std.fmt.parseInt(u4, name["db".len..], 0) catch return null),
+    );
     return std.meta.stringToEnum(Register, name);
 }
 
@@ -27733,6 +27795,14 @@ fn registerAlias(reg: Register, size_bytes: u32) Register {
             .rip
         else
             unreachable,
+        .cr => if (size_bytes <= 8)
+            reg
+        else
+            unreachable,
+        .dr => if (size_bytes <= 8)
+            reg
+        else
+            unreachable,
     };
 }
 
src/arch/x86_64/Disassembler.zig
@@ -80,6 +80,21 @@ pub fn next(dis: *Disassembler) Error!?Instruction {
                 .op2 = .{ .imm = imm },
             });
         },
+        .ii => {
+            const imm1 = try dis.parseImm(enc.data.ops[0]);
+            const imm2 = try dis.parseImm(enc.data.ops[1]);
+            return inst(enc, .{
+                .op1 = .{ .imm = imm1 },
+                .op2 = .{ .imm = imm2 },
+            });
+        },
+        .ia => {
+            const imm = try dis.parseImm(enc.data.ops[0]);
+            return inst(enc, .{
+                .op1 = .{ .imm = imm },
+                .op2 = .{ .reg = .eax },
+            });
+        },
         .m, .mi, .m1, .mc => {
             const modrm = try dis.parseModRmByte();
             const act_enc = Encoding.findByOpcode(enc.opcode(), .{
@@ -241,7 +256,7 @@ pub fn next(dis: *Disassembler) Error!?Instruction {
                 .op3 = op3,
             });
         },
-        .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, // TODO
+        .rm0, .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, // TODO
     }
 }
 
src/arch/x86_64/encoder.zig
@@ -389,6 +389,7 @@ pub const Instruction = struct {
         const enc = inst.encoding;
         const data = enc.data;
 
+        try inst.encodeWait(encoder);
         if (data.mode.isVex()) {
             try inst.encodeVexPrefix(encoder);
             const opc = inst.encoding.opcode();
@@ -404,19 +405,24 @@ pub const Instruction = struct {
             .z, .o, .zo, .oz => {},
             .i, .d => try encodeImm(inst.ops[0].imm, data.ops[0], encoder),
             .zi, .oi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
+            .ii => {
+                try encodeImm(inst.ops[0].imm, data.ops[0], encoder);
+                try encodeImm(inst.ops[1].imm, data.ops[1], encoder);
+            },
             .fd => try encoder.imm64(inst.ops[1].mem.moffs.offset),
             .td => try encoder.imm64(inst.ops[0].mem.moffs.offset),
             else => {
-                const mem_op = switch (data.op_en) {
+                const mem_op: Operand = switch (data.op_en) {
+                    .ia => .{ .reg = .eax },
                     .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
-                    .rm, .rmi, .rm0, .vmi, .rmv => inst.ops[1],
+                    .rm, .rmi, .rm0, .vm, .vmi, .rmv => inst.ops[1],
                     .rvm, .rvmr, .rvmi => inst.ops[2],
                     else => unreachable,
                 };
                 switch (mem_op) {
                     .reg => |reg| {
                         const rm = switch (data.op_en) {
-                            .m, .mi, .m1, .mc, .vmi => enc.modRmExt(),
+                            .ia, .m, .mi, .m1, .mc, .vm, .vmi => enc.modRmExt(),
                             .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(),
                             .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0].reg.lowEnc(),
                             .mvr => inst.ops[2].reg.lowEnc(),
@@ -426,7 +432,7 @@ pub const Instruction = struct {
                     },
                     .mem => |mem| {
                         const op = switch (data.op_en) {
-                            .m, .mi, .m1, .mc, .vmi => .none,
+                            .m, .mi, .m1, .mc, .vm, .vmi => .none,
                             .mr, .mri, .mrc => inst.ops[1],
                             .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0],
                             .mvr => inst.ops[2],
@@ -438,6 +444,7 @@ pub const Instruction = struct {
                 }
 
                 switch (data.op_en) {
+                    .ia => try encodeImm(inst.ops[0].imm, data.ops[0], encoder),
                     .mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
                     .rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder),
                     .rvmr => try encoder.imm8(@as(u8, inst.ops[3].reg.enc()) << 4),
@@ -460,6 +467,13 @@ pub const Instruction = struct {
         }
     }
 
+    fn encodeWait(inst: Instruction, encoder: anytype) !void {
+        switch (inst.encoding.data.mode) {
+            .wait => try encoder.opcode_1byte(0x9b),
+            else => {},
+        }
+    }
+
     fn encodeLegacyPrefixes(inst: Instruction, encoder: anytype) !void {
         const enc = inst.encoding;
         const data = enc.data;
@@ -481,7 +495,7 @@ pub const Instruction = struct {
         }
 
         const segment_override: ?Register = switch (op_en) {
-            .z, .i, .zi, .o, .zo, .oz, .oi, .d => null,
+            .z, .i, .zi, .ii, .ia, .o, .zo, .oz, .oi, .d => null,
             .fd => inst.ops[1].mem.base().reg,
             .td => inst.ops[0].mem.base().reg,
             .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister())
@@ -500,7 +514,7 @@ pub const Instruction = struct {
                 }
             else
                 null,
-            .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable,
+            .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable,
         };
         if (segment_override) |seg| {
             legacy.setSegmentOverride(seg);
@@ -517,7 +531,7 @@ pub const Instruction = struct {
         rex.w = inst.encoding.data.mode == .long;
 
         switch (op_en) {
-            .z, .i, .zi, .fd, .td, .d => {},
+            .z, .i, .zi, .ii, .ia, .fd, .td, .d => {},
             .o, .oz, .oi => rex.b = inst.ops[0].reg.isExtended(),
             .zo => rex.b = inst.ops[1].reg.isExtended(),
             .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .rmv => {
@@ -536,7 +550,7 @@ pub const Instruction = struct {
                 rex.b = b_x_op.isBaseExtended();
                 rex.x = b_x_op.isIndexExtended();
             },
-            .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
+            .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
         }
 
         try encoder.rex(rex);
@@ -552,21 +566,19 @@ pub const Instruction = struct {
         vex.w = inst.encoding.data.mode.isLong();
 
         switch (op_en) {
-            .z, .i, .zi, .fd, .td, .d => {},
-            .o, .oz, .oi => vex.b = inst.ops[0].reg.isExtended(),
-            .zo => vex.b = inst.ops[1].reg.isExtended(),
-            .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => {
+            .z, .i, .zi, .ii, .ia, .fd, .td, .d, .o, .oz, .oi, .zo => unreachable,
+            .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => {
                 const r_op = switch (op_en) {
                     .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0],
                     .mr, .mri, .mrc => inst.ops[1],
                     .mvr => inst.ops[2],
-                    .m, .mi, .m1, .mc, .vmi => .none,
+                    .m, .mi, .m1, .mc, .vm, .vmi => .none,
                     else => unreachable,
                 };
                 vex.r = r_op.isBaseExtended();
 
                 const b_x_op = switch (op_en) {
-                    .rm, .rmi, .rm0, .vmi, .rmv => inst.ops[1],
+                    .rm, .rmi, .rm0, .vm, .vmi, .rmv => inst.ops[1],
                     .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
                     .rvm, .rvmr, .rvmi => inst.ops[2],
                     else => unreachable,
@@ -595,7 +607,7 @@ pub const Instruction = struct {
 
         switch (op_en) {
             else => {},
-            .vmi => vex.v = inst.ops[0].reg,
+            .vm, .vmi => vex.v = inst.ops[0].reg,
             .rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg,
             .rmv => vex.v = inst.ops[2].reg,
         }
src/arch/x86_64/Encoding.zig
@@ -60,6 +60,32 @@ pub fn findByMnemonic(
     next: for (mnemonic_to_encodings_map[@intFromEnum(mnemonic)]) |data| {
         if (!switch (data.feature) {
             .none => true,
+            .@"32bit" => switch (target.cpu.arch) {
+                else => unreachable,
+                .x86 => true,
+                .x86_64 => false,
+            },
+            .@"64bit" => switch (target.cpu.arch) {
+                else => unreachable,
+                .x86 => false,
+                .x86_64 => true,
+            },
+            inline .@"invpcid 32bit", .@"rdpid 32bit" => |tag| switch (target.cpu.arch) {
+                else => unreachable,
+                .x86 => std.Target.x86.featureSetHas(
+                    target.cpu.features,
+                    @field(std.Target.x86.Feature, @tagName(tag)[0 .. @tagName(tag).len - " 32bit".len]),
+                ),
+                .x86_64 => false,
+            },
+            inline .@"invpcid 64bit", .@"rdpid 64bit" => |tag| switch (target.cpu.arch) {
+                else => unreachable,
+                .x86 => false,
+                .x86_64 => std.Target.x86.featureSetHas(
+                    target.cpu.features,
+                    @field(std.Target.x86.Feature, @tagName(tag)[0 .. @tagName(tag).len - " 64bit".len]),
+                ),
+            },
             inline else => |tag| has_features: {
                 comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' ');
                 comptime var features: []const std.Target.x86.Feature = &.{};
@@ -126,7 +152,7 @@ pub fn mandatoryPrefix(encoding: *const Encoding) ?u8 {
 
 pub fn modRmExt(encoding: Encoding) u3 {
     return switch (encoding.data.op_en) {
-        .m, .mi, .m1, .mc, .vmi => encoding.data.modrm_ext,
+        .ia, .m, .mi, .m1, .mc, .vm, .vmi => encoding.data.modrm_ext,
         else => unreachable,
     };
 }
@@ -176,7 +202,7 @@ pub fn format(
     for (opc) |byte| try writer.print("{x:0>2} ", .{byte});
 
     switch (encoding.data.op_en) {
-        .z, .fd, .td, .i, .zi, .d => {},
+        .z, .fd, .td, .i, .zi, .ii, .d => {},
         .o, .zo, .oz, .oi => {
             const op = switch (encoding.data.op_en) {
                 .o, .oz, .oi => encoding.data.ops[0],
@@ -192,17 +218,24 @@ pub fn format(
             };
             try writer.print("+{s} ", .{tag});
         },
-        .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}),
+        .ia, .m, .mi, .m1, .mc, .vm, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}),
         .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr, .rmv => try writer.writeAll("/r "),
     }
 
     switch (encoding.data.op_en) {
-        .i, .d, .zi, .oi, .mi, .rmi, .mri, .vmi, .rvmi => {
-            const op = switch (encoding.data.op_en) {
-                .i, .d => encoding.data.ops[0],
-                .zi, .oi, .mi => encoding.data.ops[1],
-                .rmi, .mri, .vmi => encoding.data.ops[2],
-                .rvmi => encoding.data.ops[3],
+        .i, .d, .zi, .ii, .ia, .oi, .mi, .rmi, .mri, .vmi, .rvmi => for (0..2) |i| {
+            const op = switch (i) {
+                0 => switch (encoding.data.op_en) {
+                    .i, .ii, .ia, .d => encoding.data.ops[0],
+                    .zi, .oi, .mi => encoding.data.ops[1],
+                    .rmi, .mri, .vmi => encoding.data.ops[2],
+                    .rvmi => encoding.data.ops[3],
+                    else => unreachable,
+                },
+                1 => switch (encoding.data.op_en) {
+                    .ii => encoding.data.ops[1],
+                    else => break,
+                },
                 else => unreachable,
             };
             const tag = switch (op) {
@@ -218,13 +251,13 @@ pub fn format(
             try writer.print("{s} ", .{tag});
         },
         .rvmr => try writer.writeAll("/is4 "),
-        .z, .fd, .td, .o, .zo, .oz, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr, .rmv => {},
+        .z, .fd, .td, .o, .zo, .oz, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .vm, .rvm, .mvr, .rmv => {},
     }
 
     try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
 
     for (encoding.data.ops) |op| switch (op) {
-        .none, .o16, .o32, .o64 => break,
+        .none => break,
         else => try writer.print("{s} ", .{@tagName(op)}),
     };
 
@@ -253,48 +286,67 @@ pub const Mnemonic = enum {
     @".cfi_escape",
     // zig fmt: off
     // General-purpose
-    adc, add, @"and",
-    bsf, bsr, bswap, bt, btc, btr, bts,
+    aaa, aad, aam, aas, adc, add, @"and", arpl,
+    bound, bsf, bsr, bswap, bt, btc, btr, bts,
     call, cbw, cdq, cdqe,
-    clac, clc, cld, clflush, cli, clts, clui,
+    clac, clc, cld, cldemote, clflush, clflushopt, cli, clts, clui, clrssbsy, clwb, cmc,
     cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna,
     cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno,
     cmovnp, cmovns, cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz,
-    cmp,
-    cmps, cmpsb, cmpsd, cmpsq, cmpsw,
-    cmpxchg, cmpxchg8b, cmpxchg16b,
+    cmp, cmps, cmpsb, cmpsd, cmpsq, cmpsw, cmpxchg, cmpxchg8b, cmpxchg16b,
     cpuid, cqo, cwd, cwde,
-    dec, div, idiv, imul, inc, int3,
-    ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe,
-    jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz,
-    jmp, 
-    lea, lfence,
+    daa, das, dec, div,
+    endbr32, endbr64, enqcmd, enqcmds, enter,
+    hlt, hreset,
+    idiv, imul, in, inc, incsspd, incsspq, ins, insb, insd, insw,
+    int, int1, int3, into, invd, invlpg, invpcid, iret, iretd, iretq, iretw,
+    ja, jae, jb, jbe, jc, jcxz, je, jecxz, jg, jge, jl, jle, jmp, jna, jnae, jnb, jnbe,
+    jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz,
+    lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne,
     lods, lodsb, lodsd, lodsq, lodsw,
-    lzcnt,
+    lsl, ltr, lzcnt,
     mfence, mov, movbe,
     movs, movsb, movsd, movsq, movsw,
     movsx, movsxd, movzx, mul,
     neg, nop, not,
-    @"or",
-    pause, pop, popcnt, popfq, push, pushfq,
-    rcl, rcr, ret, rol, ror, rorx,
-    sal, sar, sarx, sbb,
+    @"or", out, outs, outsb, outsd, outsw,
+    pause, pop, popcnt, popf, popfd, popfq, push, pushfq,
+    rcl, rcr,
+    rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp,
+    ret, rol, ror, rorx, rsm,
+    sahf, sal, sar, sarx, sbb,
     scas, scasb, scasd, scasq, scasw,
+    senduipi, serialize,
     shl, shld, shlx, shr, shrd, shrx,
-    stac, stc, std, sti, stui,
-    sub, syscall,
+    stac, stc, std, sti, str, stui,
+    sub, swapgs, syscall, sysenter, sysexit, sysret,
     seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
     setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns,
     setnz, seto, setp, setpe, setpo, sets, setz,
-    sfence,
+    sfence, sidt, sldt, smsw,
     stos, stosb, stosd, stosq, stosw,
-    @"test", tzcnt,
-    ud2,
-    xadd, xchg, xgetbv, xor,
+    @"test", testui, tpause,
+    ud0, ud1, ud2, uiret, umonitor, umwait,
+    verr, verw, wrfsbase, wrgsbase, wrmsr, wrpkru, wrssd, wrssq, wrussd, wrussq,
+    xadd, xchg, xgetbv, xlat, xlatb, xor,
     // X87
-    fabs, fchs, ffree, fisttp, fld, fldenv, fnstenv, fst, fstenv, fstp,
+    f2xm1, fabs, fadd, faddp, fbld, fbstp, fchs, fclex,
+    fcmovb, fcmovbe, fcmove, fcmovnb, fcmovnbe, fcmovne, fcmovnu, fcmovu,
+    fcom, fcomi, fcomip, fcomp, fcompp, fcos,
+    fdecstp, fdiv, fdivp, fdivr, fdivrp, ffree,
+    fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fincstp, finit,
+    fist, fistp, fisttp, fisub, fisubr,
+    fld, fld1, fldcw, fldenv, fldl2e, fldl2t, fldlg2, fldln2, fldpi, fldz,
+    fmul, fmulp,
+    fnclex, fninit, fnop, fnsave, fnstcw, fnstenv, fnstsw,
+    fpatan, fprem, fprem1, fptan, frndint, frstor,
+    fsave, fscale, fsin, fsincos, fsqrt,
+    fst, fstcw, fstenv, fstp, fstsw,
+    fsub, fsubp, fsubr, fsubrp,
+    ftst, fucom, fucomi, fucomip, fucomp, fucompp,
+    fwait, fxam, fxch, fxtract, fyl2x, fyl2xp1, wait,
     // MMX
-    movd, movq,
+    emms, movd, movq,
     packssdw, packsswb, packuswb,
     paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw,
     pand, pandn, por, pxor,
@@ -312,6 +364,7 @@ pub const Mnemonic = enum {
     cmpps, cmpss,
     cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si,
     divps, divss,
+    fxrstor, fxrstor64, fxsave, fxsave64,
     ldmxcsr,
     maxps, maxss,
     minps, minss,
@@ -333,10 +386,12 @@ pub const Mnemonic = enum {
     andpd,
     andnpd,
     cmppd, //cmpsd,
+    comisd, comiss,
     cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd,
     cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd,
     cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si,
     divpd, divsd,
+    gf2p8affineinvqb, gf2p8affineqb, gf2p8mulb,
     maxpd, maxsd,
     minpd, minsd,
     movapd,
@@ -357,11 +412,12 @@ pub const Mnemonic = enum {
     ucomisd,
     xorpd,
     // SSE3
-    movddup, movshdup, movsldup,
+    addsubpd, addsubps, haddpd, haddps, lddqu, movddup, movshdup, movsldup,
     // SSSE3
     pabsb, pabsd, pabsw, palignr, pshufb,
     // SSE4.1
     blendpd, blendps, blendvpd, blendvps,
+    dppd, dpps,
     extractps,
     insertps,
     packusdw,
@@ -376,28 +432,32 @@ pub const Mnemonic = enum {
     ptest,
     roundpd, roundps, roundsd, roundss,
     // SSE4.2
-    pcmpgtq,
+    crc32, pcmpgtq,
     // PCLMUL
     pclmulqdq,
     // AES
     aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist,
     // SHA
-    sha256msg1, sha256msg2, sha256rnds2,
+    sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2,
     // AVX
-    vaddpd, vaddps, vaddsd, vaddss,
+    andn, bextr, blsi, blsmsk, blsr, bzhi, tzcnt,
+    vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps,
     vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist,
     vandnpd, vandnps, vandpd, vandps,
     vblendpd, vblendps, vblendvpd, vblendvps,
     vbroadcastf128, vbroadcastsd, vbroadcastss,
-    vcmppd, vcmpps, vcmpsd, vcmpss,
+    vcmppd, vcmpps, vcmpsd, vcmpss, vcomisd, vcomiss,
     vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
     vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
     vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si,
     vcvttpd2dq, vcvttps2dq, vcvttsd2si, vcvttss2si,
     vdivpd, vdivps, vdivsd, vdivss,
+    vdppd, vdpps,
     vextractf128, vextractps,
+    vgf2p8affineinvqb, vgf2p8affineqb, vgf2p8mulb,
+    vhaddpd, vhaddps,
     vinsertf128, vinsertps,
-    vldmxcsr,
+    vlddqu, vldmxcsr,
     vmaxpd, vmaxps, vmaxsd, vmaxss,
     vminpd, vminps, vminsd, vminss,
     vmovapd, vmovaps,
@@ -455,6 +515,12 @@ pub const Mnemonic = enum {
     // AVX2
     vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw,
     vextracti128, vinserti128, vpblendd,
+    // ADX
+    adcx, adox,
+    // AESKLE
+    aesdec128kl, aesdec256kl, aesenc128kl, aesenc256kl, encodekey128, encodekey256, loadiwkey,
+    // AESKLEWIDE_KL
+    aesdecwide128kl, aesdecwide256kl, aesencwide128kl, aesencwide256kl,
     // zig fmt: on
 };
 
@@ -462,24 +528,23 @@ pub const OpEn = enum {
     // zig fmt: off
     z,
     o, zo, oz, oi,
-    i, zi,
+    i, zi, ii, ia,
     d, m,
     fd, td,
     m1, mc, mi, mr, rm,
     rmi, mri, mrc,
-    rm0, vmi, rvm, rvmr, rvmi, mvr, rmv,
+    rm0, vm, vmi, rvm, rvmr, rvmi, mvr, rmv,
     // zig fmt: on
 };
 
 pub const Op = enum {
     // zig fmt: off
     none,
-    o16, o32, o64,
     unity,
     imm8, imm16, imm32, imm64,
     imm8s, imm16s, imm32s,
     al, ax, eax, rax,
-    cl,
+    cl, dx,
     rip, eip, ip,
     r8, r16, r32, r64,
     rm8, rm16, rm32, rm64,
@@ -489,9 +554,10 @@ pub const Op = enum {
     m,
     moffs,
     sreg,
-    st, mm, mm_m64,
+    st0, st, mm, mm_m64,
     xmm0, xmm, xmm_m8, xmm_m16, xmm_m32, xmm_m64, xmm_m128,
     ymm, ymm_m256,
+    cr, dr,
     // zig fmt: on
 
     pub fn fromOperand(operand: Instruction.Operand, target: *const std.Target) Op {
@@ -499,32 +565,34 @@ pub const Op = enum {
             .none => .none,
 
             .reg => |reg| switch (reg.class()) {
-                .general_purpose => if (reg.to64() == .rax)
-                    switch (reg) {
-                        .al => .al,
-                        .ax => .ax,
-                        .eax => .eax,
-                        .rax => .rax,
+                .general_purpose => switch (reg) {
+                    .al => .al,
+                    .ax => .ax,
+                    .eax => .eax,
+                    .rax => .rax,
+                    .cl => .cl,
+                    .dx => .dx,
+                    else => switch (reg.bitSize()) {
+                        8 => .r8,
+                        16 => .r16,
+                        32 => .r32,
+                        64 => .r64,
                         else => unreachable,
-                    }
-                else if (reg == .cl)
-                    .cl
-                else switch (reg.bitSize()) {
-                    8 => .r8,
-                    16 => .r16,
-                    32 => .r32,
-                    64 => .r64,
-                    else => unreachable,
+                    },
                 },
                 .segment => .sreg,
-                .x87 => .st,
+                .x87 => switch (reg) {
+                    .st0 => .st0,
+                    else => .st,
+                },
                 .mmx => .mm,
-                .sse => if (reg == .xmm0)
-                    .xmm0
-                else switch (reg.bitSize()) {
-                    128 => .xmm,
-                    256 => .ymm,
-                    else => unreachable,
+                .sse => switch (reg) {
+                    .xmm0 => .xmm0,
+                    else => switch (reg.bitSize()) {
+                        128 => .xmm,
+                        256 => .ymm,
+                        else => unreachable,
+                    },
                 },
                 .ip => switch (reg) {
                     .rip => .rip,
@@ -532,6 +600,8 @@ pub const Op = enum {
                     .ip => .ip,
                     else => unreachable,
                 },
+                .cr => .cr,
+                .dr => .dr,
             },
 
             .mem => |mem| switch (mem) {
@@ -588,24 +658,27 @@ pub const Op = enum {
             .eax => .eax,
             .rax => .rax,
             .cl => .cl,
+            .dx => .dx,
             .rip => .rip,
             .eip => .eip,
             .ip => .ip,
+            .st0 => .st0,
             .xmm0 => .xmm0,
         };
     }
 
     pub fn immBitSize(op: Op) u64 {
         return switch (op) {
-            .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
-            .al, .cl, .rip, .eip, .ip, .r8, .rm8, .r32_m8 => unreachable,
+            .none, .moffs, .m, .sreg => unreachable,
+            .al, .cl, .dx, .rip, .eip, .ip, .r8, .rm8, .r32_m8 => unreachable,
             .ax, .r16, .rm16 => unreachable,
             .eax, .r32, .rm32, .r32_m16 => unreachable,
             .rax, .r64, .rm64, .r64_m16 => unreachable,
-            .st, .mm, .mm_m64 => unreachable,
+            .st0, .st, .mm, .mm_m64 => unreachable,
             .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
             .ymm, .ymm_m256 => unreachable,
             .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
+            .cr, .dr => unreachable,
             .unity => 1,
             .imm8, .imm8s, .rel8 => 8,
             .imm16, .imm16s, .rel16 => 16,
@@ -616,15 +689,15 @@ pub const Op = enum {
 
     pub fn regBitSize(op: Op) u64 {
         return switch (op) {
-            .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
+            .none, .moffs, .m, .sreg => unreachable,
             .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
             .rel8, .rel16, .rel32 => unreachable,
             .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
             .al, .cl, .r8, .rm8 => 8,
-            .ax, .ip, .r16, .rm16 => 16,
+            .ax, .dx, .ip, .r16, .rm16 => 16,
             .eax, .eip, .r32, .rm32, .r32_m8, .r32_m16 => 32,
-            .rax, .rip, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64,
-            .st => 80,
+            .rax, .rip, .r64, .rm64, .r64_m16, .mm, .mm_m64, .cr, .dr => 64,
+            .st0, .st => 80,
             .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
             .ymm, .ymm_m256 => 256,
         };
@@ -632,11 +705,12 @@ pub const Op = enum {
 
     pub fn memBitSize(op: Op) u64 {
         return switch (op) {
-            .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
+            .none, .moffs, .m, .sreg => unreachable,
             .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
             .rel8, .rel16, .rel32 => unreachable,
-            .al, .cl, .r8, .ax, .ip, .r16, .eax, .eip, .r32, .rax, .rip, .r64 => unreachable,
-            .st, .mm, .xmm0, .xmm, .ymm => unreachable,
+            .al, .cl, .r8, .ax, .dx, .ip, .r16, .eax, .eip, .r32, .rax, .rip, .r64 => unreachable,
+            .st0, .st, .mm, .xmm0, .xmm, .ymm => unreachable,
+            .cr, .dr => unreachable,
             .m8, .rm8, .r32_m8, .xmm_m8 => 8,
             .m16, .rm16, .r32_m16, .r64_m16, .xmm_m16 => 16,
             .m32, .rm32, .xmm_m32 => 32,
@@ -664,14 +738,15 @@ pub const Op = enum {
         // zig fmt: off
         return switch (op) {
             .al, .ax, .eax, .rax,
-            .cl,
+            .cl, .dx,
             .ip, .eip, .rip,
             .r8, .r16, .r32, .r64,
             .rm8, .rm16, .rm32, .rm64,
             .r32_m8, .r32_m16, .r64_m16,
-            .st, .mm, .mm_m64,
+            .st0, .st, .mm, .mm_m64,
             .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128,
             .ymm, .ymm_m256,
+            .cr, .dr,
             => true,
             else => false,
         };
@@ -717,33 +792,34 @@ pub const Op = enum {
     pub fn class(op: Op) bits.Register.Class {
         return switch (op) {
             else => unreachable,
-            .al, .ax, .eax, .rax, .cl => .general_purpose,
+            .al, .ax, .eax, .rax, .cl, .dx => .general_purpose,
             .r8, .r16, .r32, .r64 => .general_purpose,
             .rm8, .rm16, .rm32, .rm64 => .general_purpose,
             .r32_m8, .r32_m16, .r64_m16 => .general_purpose,
             .sreg => .segment,
-            .st => .x87,
+            .st0, .st => .x87,
             .mm, .mm_m64 => .mmx,
             .xmm0, .xmm, .xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
             .ymm, .ymm_m256 => .sse,
             .rip, .eip, .ip => .ip,
+            .cr => .cr,
+            .dr => .dr,
         };
     }
 
     /// Given an operand `op` checks if `target` is a subset for the purposes of the encoding.
     pub fn isSubset(op: Op, target: Op) bool {
         switch (op) {
-            .o16, .o32, .o64 => unreachable,
             .moffs, .sreg => return op == target,
             .none => switch (target) {
-                .o16, .o32, .o64, .none => return true,
+                .none => return true,
                 else => return false,
             },
             else => {
                 if (op.isRegister() and target.isRegister()) {
-                    return switch (target) {
-                        .cl, .al, .ax, .eax, .rax, .xmm0 => op == target,
-                        else => op.class() == target.class() and op.regBitSize() == target.regBitSize(),
+                    return switch (target.toReg()) {
+                        .none => op.class() == target.class() and op.regBitSize() == target.regBitSize(),
+                        else => op == target,
                     };
                 }
                 if (op.isMemory() and target.isMemory()) {
@@ -779,6 +855,7 @@ pub const Mode = enum {
     none,
     short, long,
     rex, rex_short,
+    wait,
     vex_128_w0, vex_128_w1, vex_128_wig,
     vex_256_w0, vex_256_w1, vex_256_wig,
     vex_lig_w0, vex_lig_w1, vex_lig_wig,
@@ -841,20 +918,46 @@ pub const Mode = enum {
 
 pub const Feature = enum {
     none,
+    @"32bit",
+    @"64bit",
+    adx,
     aes,
     @"aes avx",
     avx,
     avx2,
     bmi,
     bmi2,
+    cldemote,
+    clflushopt,
+    clwb,
     cmov,
+    @"cmov x87",
+    crc32,
+    enqcmd,
     f16c,
     fma,
+    fsgsbase,
+    fxsr,
+    gfni,
+    @"gfni avx",
+    hreset,
+    @"invpcid 32bit",
+    @"invpcid 64bit",
+    kl,
     lzcnt,
+    mmx,
     movbe,
     pclmul,
     @"pclmul avx",
+    pku,
     popcnt,
+    rdrnd,
+    rdseed,
+    @"rdpid 32bit",
+    @"rdpid 64bit",
+    sahf,
+    serialize,
+    shstk,
     smap,
     sse,
     sse2,
@@ -866,6 +969,8 @@ pub const Feature = enum {
     uintr,
     vaes,
     vpclmulqdq,
+    waitpkg,
+    widekl,
     x87,
 };
 
@@ -886,7 +991,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
 }
 
 const mnemonic_to_encodings_map = init: {
-    @setEvalBranchQuota(5_000);
+    @setEvalBranchQuota(5_600);
     const mnemonic_count = @typeInfo(Mnemonic).@"enum".fields.len;
     var mnemonic_map: [mnemonic_count][]Data = @splat(&.{});
     const encodings = @import("encodings.zig");
src/arch/x86_64/encodings.zig
@@ -13,6 +13,16 @@ pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mo
 // zig fmt: off
 pub const table = [_]Entry{
     // General-purpose
+    .{ .aaa, .z, &.{}, &.{ 0x37 }, 0, .none, .@"32bit" },
+
+    .{ .aad, .z,  &.{       }, &.{ 0xd5, 0x0a }, 0, .none, .@"32bit" },
+    .{ .aad, .zi, &.{ .imm8 }, &.{ 0xd5       }, 0, .none, .@"32bit" },
+
+    .{ .aam, .z,  &.{       }, &.{ 0xd4, 0x0a }, 0, .none, .@"32bit" },
+    .{ .aam, .z,  &.{ .imm8 }, &.{ 0xd4       }, 0, .none, .@"32bit" },
+
+    .{ .aas, .z,  &.{}, &.{ 0x3f }, 0, .none, .@"32bit" },
+
     .{ .adc, .zi, &.{ .al,   .imm8   }, &.{ 0x14 }, 0, .none,  .none },
     .{ .adc, .zi, &.{ .ax,   .imm16  }, &.{ 0x15 }, 0, .short, .none },
     .{ .adc, .zi, &.{ .eax,  .imm32  }, &.{ 0x15 }, 0, .none,  .none },
@@ -82,6 +92,11 @@ pub const table = [_]Entry{
     .{ .@"and", .rm, &.{ .r32,  .rm32   }, &.{ 0x23 }, 0, .none,  .none },
     .{ .@"and", .rm, &.{ .r64,  .rm64   }, &.{ 0x23 }, 0, .long,  .none },
 
+    .{ .arpl, .mr, &.{ .rm16, .r16 }, &.{ 0x63 }, 0, .none, .@"32bit" },
+
+    .{ .bound, .rm, &.{ .r16, .m }, &.{ 0x62 }, 0, .short, .@"32bit" },
+    .{ .bound, .rm, &.{ .r32, .m }, &.{ 0x62 }, 0, .short, .@"32bit" },
+
     .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .short, .none },
     .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none,  .none },
     .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long,  .none },
@@ -122,15 +137,12 @@ pub const table = [_]Entry{
     .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long,  .none },
 
     .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none, .none },
-    .{ .call, .m, &.{ .rm64  }, &.{ 0xff }, 2, .none, .none },
+    .{ .call, .m, &.{ .rm32  }, &.{ 0xff }, 2, .none, .@"32bit" },
+    .{ .call, .m, &.{ .rm64  }, &.{ 0xff }, 2, .none, .@"64bit" },
 
-    .{ .cbw,  .z, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none },
-    .{ .cwde, .z, &.{ .o32 }, &.{ 0x98 }, 0, .none,  .none },
-    .{ .cdqe, .z, &.{ .o64 }, &.{ 0x98 }, 0, .long,  .none },
-
-    .{ .cwd, .z, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none },
-    .{ .cdq, .z, &.{ .o32 }, &.{ 0x99 }, 0, .none,  .none },
-    .{ .cqo, .z, &.{ .o64 }, &.{ 0x99 }, 0, .long,  .none },
+    .{ .cbw,  .z, &.{}, &.{ 0x98 }, 0, .short, .none },
+    .{ .cwde, .z, &.{}, &.{ 0x98 }, 0, .none,  .none },
+    .{ .cdqe, .z, &.{}, &.{ 0x98 }, 0, .long,  .none },
 
     .{ .clac, .z, &.{}, &.{ 0x0f, 0x01, 0xca }, 0, .none, .smap },
 
@@ -138,14 +150,24 @@ pub const table = [_]Entry{
 
     .{ .cld, .z, &.{}, &.{ 0xfc }, 0, .none, .none },
 
+    .{ .cldemote, .m, &.{ .m8 }, &.{ 0x0f, 0x1c }, 0, .none, .cldemote },
+
     .{ .clflush, .m, &.{ .m8 }, &.{ 0x0f, 0xae }, 7, .none, .none },
 
+    .{ .clflushopt, .m, &.{ .m8 }, &.{ 0x66, 0x0f, 0xae }, 7, .none, .clflushopt },
+
     .{ .cli, .z, &.{}, &.{ 0xfa }, 0, .none, .none },
 
+    .{ .clrssbsy, .m, &.{ .m64 }, &.{ 0xf3, 0x0f, 0xae }, 6, .none, .shstk },
+
     .{ .clts, .z, &.{}, &.{ 0x0f, 0x06 }, 0, .none, .none },
 
     .{ .clui, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xee }, 0, .none, .uintr },
 
+    .{ .clwb, .m, &.{ .m8 }, &.{ 0x66, 0x0f, 0xae }, 6, .none, .clwb },
+
+    .{ .cmc, .z, &.{}, &.{ 0xf5 }, 0, .none, .none },
+
     .{ .cmova,   .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov },
     .{ .cmova,   .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none,  .cmov },
     .{ .cmova,   .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long,  .cmov },
@@ -264,11 +286,10 @@ pub const table = [_]Entry{
     .{ .cmps,  .z, &.{ .m16,  .m16  }, &.{ 0xa7 }, 0, .short, .none },
     .{ .cmps,  .z, &.{ .m32,  .m32  }, &.{ 0xa7 }, 0, .none,  .none },
     .{ .cmps,  .z, &.{ .m64,  .m64  }, &.{ 0xa7 }, 0, .long,  .none },
-
-    .{ .cmpsb, .z, &.{}, &.{ 0xa6 }, 0, .none,  .none },
-    .{ .cmpsw, .z, &.{}, &.{ 0xa7 }, 0, .short, .none },
-    .{ .cmpsd, .z, &.{}, &.{ 0xa7 }, 0, .none,  .none },
-    .{ .cmpsq, .z, &.{}, &.{ 0xa7 }, 0, .long,  .none },
+    .{ .cmpsb, .z, &.{              }, &.{ 0xa6 }, 0, .none,  .none },
+    .{ .cmpsw, .z, &.{              }, &.{ 0xa7 }, 0, .short, .none },
+    .{ .cmpsd, .z, &.{              }, &.{ 0xa7 }, 0, .none,  .none },
+    .{ .cmpsq, .z, &.{              }, &.{ 0xa7 }, 0, .long,  .none },
 
     .{ .cmpxchg, .mr, &.{ .rm8,  .r8  }, &.{ 0x0f, 0xb0 }, 0, .none,  .none },
     .{ .cmpxchg, .mr, &.{ .rm8,  .r8  }, &.{ 0x0f, 0xb0 }, 0, .rex,   .none },
@@ -281,6 +302,14 @@ pub const table = [_]Entry{
 
     .{ .cpuid, .z, &.{}, &.{ 0x0f, 0xa2 }, 0, .none, .none },
 
+    .{ .cwd, .z, &.{}, &.{ 0x99 }, 0, .short, .none },
+    .{ .cdq, .z, &.{}, &.{ 0x99 }, 0, .none,  .none },
+    .{ .cqo, .z, &.{}, &.{ 0x99 }, 0, .long,  .none },
+
+    .{ .daa, .z, &.{}, &.{ 0x27 }, 0, .none, .@"32bit" },
+
+    .{ .das, .z, &.{}, &.{ 0x27 }, 0, .none, .@"32bit" },
+
     .{ .dec, .m, &.{ .rm8  }, &.{ 0xfe }, 1, .none,  .none },
     .{ .dec, .m, &.{ .rm8  }, &.{ 0xfe }, 1, .rex,   .none },
     .{ .dec, .m, &.{ .rm16 }, &.{ 0xff }, 1, .short, .none },
@@ -293,26 +322,50 @@ pub const table = [_]Entry{
     .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none,  .none },
     .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long,  .none },
 
+    .{ .endbr32, .z, &.{}, &.{ 0xf3, 0x0f, 0x1e, 0xfb }, 0, .none, .none },
+
+    .{ .endbr64, .z, &.{}, &.{ 0xf3, 0x0f, 0x1e, 0xfa }, 0, .none, .none },
+
+    .{ .enqcmd, .rm, &.{ .r32, .m }, &.{ 0xf2, 0x0f, 0x38, 0xf8 }, 0, .none, .enqcmd },
+    .{ .enqcmd, .rm, &.{ .r64, .m }, &.{ 0xf2, 0x0f, 0x38, 0xf8 }, 0, .none, .enqcmd },
+
+    .{ .enqcmds, .rm, &.{ .r32, .m }, &.{ 0xf3, 0x0f, 0x38, 0xf8 }, 0, .none, .enqcmd },
+    .{ .enqcmds, .rm, &.{ .r64, .m }, &.{ 0xf3, 0x0f, 0x38, 0xf8 }, 0, .none, .enqcmd },
+
+    .{ .enter, .ii, &.{ .imm16, .imm8 }, &.{ 0xc8 }, 0, .none, .none },
+
+    .{ .hlt, .z, &.{}, &.{ 0xf4 }, 0, .none, .none },
+
+    .{ .hreset, .ia, &.{ .imm8       }, &.{ 0xf3, 0x0f, 0x3a, 0xf0 }, 0, .none, .hreset },
+    .{ .hreset, .ia, &.{ .imm8, .eax }, &.{ 0xf3, 0x0f, 0x3a, 0xf0 }, 0, .none, .hreset },
+
     .{ .idiv, .m, &.{ .rm8  }, &.{ 0xf6 }, 7, .none,  .none },
     .{ .idiv, .m, &.{ .rm8  }, &.{ 0xf6 }, 7, .rex,   .none },
     .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none },
     .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none,  .none },
     .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long,  .none },
 
-    .{ .imul, .m,   &.{ .rm8                 }, &.{ 0xf6       }, 5, .none,  .none },
-    .{ .imul, .m,   &.{ .rm8                 }, &.{ 0xf6       }, 5, .rex,   .none },
-    .{ .imul, .m,   &.{ .rm16,               }, &.{ 0xf7       }, 5, .short, .none },
-    .{ .imul, .m,   &.{ .rm32,               }, &.{ 0xf7       }, 5, .none,  .none },
-    .{ .imul, .m,   &.{ .rm64,               }, &.{ 0xf7       }, 5, .long,  .none },
+    .{ .imul, .m,   &.{ .rm8                 }, &.{       0xf6 }, 5, .none,  .none },
+    .{ .imul, .m,   &.{ .rm8                 }, &.{       0xf6 }, 5, .rex,   .none },
+    .{ .imul, .m,   &.{ .rm16,               }, &.{       0xf7 }, 5, .short, .none },
+    .{ .imul, .m,   &.{ .rm32,               }, &.{       0xf7 }, 5, .none,  .none },
+    .{ .imul, .m,   &.{ .rm64,               }, &.{       0xf7 }, 5, .long,  .none },
     .{ .imul, .rm,  &.{ .r16,  .rm16,        }, &.{ 0x0f, 0xaf }, 0, .short, .none },
     .{ .imul, .rm,  &.{ .r32,  .rm32,        }, &.{ 0x0f, 0xaf }, 0, .none,  .none },
     .{ .imul, .rm,  &.{ .r64,  .rm64,        }, &.{ 0x0f, 0xaf }, 0, .long,  .none },
-    .{ .imul, .rmi, &.{ .r16,  .rm16, .imm8s }, &.{ 0x6b       }, 0, .short, .none },
-    .{ .imul, .rmi, &.{ .r32,  .rm32, .imm8s }, &.{ 0x6b       }, 0, .none,  .none },
-    .{ .imul, .rmi, &.{ .r64,  .rm64, .imm8s }, &.{ 0x6b       }, 0, .long,  .none },
-    .{ .imul, .rmi, &.{ .r16,  .rm16, .imm16 }, &.{ 0x69       }, 0, .short, .none },
-    .{ .imul, .rmi, &.{ .r32,  .rm32, .imm32 }, &.{ 0x69       }, 0, .none,  .none },
-    .{ .imul, .rmi, &.{ .r64,  .rm64, .imm32 }, &.{ 0x69       }, 0, .long,  .none },
+    .{ .imul, .rmi, &.{ .r16,  .rm16, .imm8s }, &.{       0x6b }, 0, .short, .none },
+    .{ .imul, .rmi, &.{ .r32,  .rm32, .imm8s }, &.{       0x6b }, 0, .none,  .none },
+    .{ .imul, .rmi, &.{ .r64,  .rm64, .imm8s }, &.{       0x6b }, 0, .long,  .none },
+    .{ .imul, .rmi, &.{ .r16,  .rm16, .imm16 }, &.{       0x69 }, 0, .short, .none },
+    .{ .imul, .rmi, &.{ .r32,  .rm32, .imm32 }, &.{       0x69 }, 0, .none,  .none },
+    .{ .imul, .rmi, &.{ .r64,  .rm64, .imm32 }, &.{       0x69 }, 0, .long,  .none },
+
+    .{ .in, .zi, &.{ .al,  .imm8 }, &.{ 0xe4 }, 0, .none,  .none },
+    .{ .in, .zi, &.{ .ax,  .imm8 }, &.{ 0xe5 }, 0, .short, .none },
+    .{ .in, .zi, &.{ .eax, .imm8 }, &.{ 0xe5 }, 0, .none,  .none },
+    .{ .in, .z,  &.{ .al,  .dx   }, &.{ 0xec }, 0, .none,  .none },
+    .{ .in, .z,  &.{ .ax,  .dx   }, &.{ 0xed }, 0, .short, .none },
+    .{ .in, .z,  &.{ .eax, .dx   }, &.{ 0xed }, 0, .none,  .none },
 
     .{ .inc, .m, &.{ .rm8  }, &.{ 0xfe }, 0, .none,  .none },
     .{ .inc, .m, &.{ .rm8  }, &.{ 0xfe }, 0, .rex,   .none },
@@ -320,58 +373,108 @@ pub const table = [_]Entry{
     .{ .inc, .m, &.{ .rm32 }, &.{ 0xff }, 0, .none,  .none },
     .{ .inc, .m, &.{ .rm64 }, &.{ 0xff }, 0, .long,  .none },
 
-    .{ .int3, .z, &.{}, &.{ 0xcc }, 0, .none, .none },
-
-    .{ .ja,    .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none },
-    .{ .jae,   .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none },
-    .{ .jb,    .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
-    .{ .jbe,   .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none },
-    .{ .jc,    .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
-    .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3       }, 0, .none, .none },
-    .{ .je,    .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none },
-    .{ .jg,    .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none },
-    .{ .jge,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none },
-    .{ .jl,    .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none },
-    .{ .jle,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none },
-    .{ .jna,   .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none },
-    .{ .jnae,  .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
-    .{ .jnb,   .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none },
-    .{ .jnbe,  .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none },
-    .{ .jnc,   .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none },
-    .{ .jne,   .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none },
-    .{ .jng,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none },
-    .{ .jnge,  .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none },
-    .{ .jnl,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none },
-    .{ .jnle,  .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none },
-    .{ .jno,   .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none, .none },
-    .{ .jnp,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none },
-    .{ .jns,   .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none, .none },
-    .{ .jnz,   .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none },
-    .{ .jo,    .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none, .none },
-    .{ .jp,    .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none },
-    .{ .jpe,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none },
-    .{ .jpo,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none },
-    .{ .js,    .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none, .none },
-    .{ .jz,    .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none },
+    .{ .incsspd, .m, &.{ .r32 }, &.{ 0xf3, 0x0f, 0xae }, 5, .none, .shstk },
+    .{ .incsspq, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0xae }, 5, .long, .shstk },
+
+    .{ .ins,  .z, &.{ .m8,  .dx }, &.{ 0x6c }, 0, .none,  .none },
+    .{ .ins,  .z, &.{ .m16, .dx }, &.{ 0x6d }, 0, .short, .none },
+    .{ .ins,  .z, &.{ .m32, .dx }, &.{ 0x6d }, 0, .none,  .none },
+    .{ .insb, .z, &.{           }, &.{ 0x6c }, 0, .none,  .none },
+    .{ .insw, .z, &.{           }, &.{ 0x6d }, 0, .short, .none },
+    .{ .insd, .z, &.{           }, &.{ 0x6d }, 0, .none,  .none },
+
+    .{ .int3, .z, &.{       }, &.{ 0xcc }, 0, .none, .none     },
+    .{ .int,  .i, &.{ .imm8 }, &.{ 0xcd }, 0, .none, .none     },
+    .{ .into, .z, &.{       }, &.{ 0xce }, 0, .none, .@"32bit" },
+    .{ .int1, .z, &.{       }, &.{ 0xf1 }, 0, .none, .none     },
+
+    .{ .invd, .z, &.{}, &.{ 0x0f, 0x08 }, 0, .none, .none },
+
+    .{ .invlpg, .m, &.{ .m }, &.{ 0x0f, 0x01 }, 7, .none, .none },
+
+    .{ .invpcid, .rm, &.{ .r32, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x82 }, 0, .none, .@"invpcid 32bit" },
+    .{ .invpcid, .rm, &.{ .r64, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x82 }, 0, .none, .@"invpcid 64bit" },
+
+    .{ .iretw, .z, &.{}, &.{ 0xcf }, 0, .short, .none },
+    .{ .iretd, .z, &.{}, &.{ 0xcf }, 0, .none,  .none },
+    .{ .iret,  .z, &.{}, &.{ 0xcf }, 0, .none,  .none },
+    .{ .iretq, .z, &.{}, &.{ 0xcf }, 0, .long,  .none },
+
+    .{ .ja,    .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none,  .none     },
+    .{ .jae,   .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none,  .none     },
+    .{ .jb,    .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none,  .none     },
+    .{ .jbe,   .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none,  .none     },
+    .{ .jc,    .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none,  .none     },
+    .{ .jcxz,  .d, &.{ .rel32 }, &.{ 0xe3       }, 0, .short, .@"32bit" },
+    .{ .jecxz, .d, &.{ .rel32 }, &.{ 0xe3       }, 0, .none,  .@"32bit" },
+    .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3       }, 0, .none,  .@"64bit" },
+    .{ .je,    .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none,  .none     },
+    .{ .jg,    .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none,  .none     },
+    .{ .jge,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none,  .none     },
+    .{ .jl,    .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none,  .none     },
+    .{ .jle,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none,  .none     },
+    .{ .jna,   .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none,  .none     },
+    .{ .jnae,  .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none,  .none     },
+    .{ .jnb,   .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none,  .none     },
+    .{ .jnbe,  .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none,  .none     },
+    .{ .jnc,   .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none,  .none     },
+    .{ .jne,   .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none,  .none     },
+    .{ .jng,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none,  .none     },
+    .{ .jnge,  .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none,  .none     },
+    .{ .jnl,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none,  .none     },
+    .{ .jnle,  .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none,  .none     },
+    .{ .jno,   .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none,  .none     },
+    .{ .jnp,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none,  .none     },
+    .{ .jns,   .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none,  .none     },
+    .{ .jnz,   .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none,  .none     },
+    .{ .jo,    .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none,  .none     },
+    .{ .jp,    .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none,  .none     },
+    .{ .jpe,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none,  .none     },
+    .{ .jpo,   .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none,  .none     },
+    .{ .js,    .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none,  .none     },
+    .{ .jz,    .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none,  .none     },
 
     .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none, .none },
     .{ .jmp, .m, &.{ .rm64  }, &.{ 0xff }, 4, .none, .none },
 
+    .{ .lahf, .z, &.{}, &.{ 0x9f }, 0, .none, .sahf },
+
+    .{ .lar, .rm, &.{ .r16, .rm16    }, &.{ 0x0f, 0x02 }, 0, .none, .none },
+    .{ .lar, .rm, &.{ .r32, .r32_m16 }, &.{ 0x0f, 0x02 }, 0, .none, .none },
+
     .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .short, .none },
     .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none,  .none },
     .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long,  .none },
 
+    .{ .leave, .z, &.{}, &.{ 0xc9 }, 0, .none, .none },
+
     .{ .lfence, .z, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none },
 
+    .{ .lgdt, .m, &.{ .m }, &.{ 0x0f, 0x01 }, 2, .none, .none },
+    .{ .lidt, .m, &.{ .m }, &.{ 0x0f, 0x01 }, 3, .none, .none },
+
+    .{ .lldt, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 2, .none, .none },
+
+    .{ .lmsw, .m, &.{ .rm16 }, &.{ 0x0f, 0x01 }, 6, .none, .none },
+
     .{ .lods,  .z, &.{ .m8  }, &.{ 0xac }, 0, .none,  .none },
     .{ .lods,  .z, &.{ .m16 }, &.{ 0xad }, 0, .short, .none },
     .{ .lods,  .z, &.{ .m32 }, &.{ 0xad }, 0, .none,  .none },
     .{ .lods,  .z, &.{ .m64 }, &.{ 0xad }, 0, .long,  .none },
+    .{ .lodsb, .z, &.{      }, &.{ 0xac }, 0, .none,  .none },
+    .{ .lodsw, .z, &.{      }, &.{ 0xad }, 0, .short, .none },
+    .{ .lodsd, .z, &.{      }, &.{ 0xad }, 0, .none,  .none },
+    .{ .lodsq, .z, &.{      }, &.{ 0xad }, 0, .long,  .none },
+
+    .{ .loop,   .d, &.{ .rel8 }, &.{ 0xe2 }, 0, .none, .none },
+    .{ .loope,  .d, &.{ .rel8 }, &.{ 0xe1 }, 0, .none, .none },
+    .{ .loopne, .d, &.{ .rel8 }, &.{ 0xe0 }, 0, .none, .none },
 
-    .{ .lodsb, .z, &.{}, &.{ 0xac }, 0, .none,  .none },
-    .{ .lodsw, .z, &.{}, &.{ 0xad }, 0, .short, .none },
-    .{ .lodsd, .z, &.{}, &.{ 0xad }, 0, .none,  .none },
-    .{ .lodsq, .z, &.{}, &.{ 0xad }, 0, .long,  .none },
+    .{ .lsl, .rm, &.{ .r16, .rm16    }, &.{ 0x0f, 0x03 }, 0, .none, .none },
+    .{ .lsl, .rm, &.{ .r32, .r32_m16 }, &.{ 0x0f, 0x03 }, 0, .none, .none },
+    .{ .lsl, .rm, &.{ .r64, .r32_m16 }, &.{ 0x0f, 0x03 }, 0, .none, .none },
+
+    .{ .ltr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 3, .none, .none },
 
     .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
     .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none,  .lzcnt },
@@ -414,6 +517,16 @@ pub const table = [_]Entry{
     .{ .mov, .mi, &.{ .rm32,    .imm32   }, &.{ 0xc7 }, 0, .none,  .none },
     .{ .mov, .mi, &.{ .rm64,    .imm32s  }, &.{ 0xc7 }, 0, .long,  .none },
 
+    .{ .mov, .mr, &.{ .r32, .cr }, &.{ 0x0f, 0x20 }, 0, .none, .@"32bit" },
+    .{ .mov, .mr, &.{ .r64, .cr }, &.{ 0x0f, 0x20 }, 0, .none, .@"64bit" },
+    .{ .mov, .rm, &.{ .cr, .r32 }, &.{ 0x0f, 0x22 }, 0, .none, .@"32bit" },
+    .{ .mov, .rm, &.{ .cr, .r64 }, &.{ 0x0f, 0x22 }, 0, .none, .@"64bit" },
+
+    .{ .mov, .mr, &.{ .r32, .dr }, &.{ 0x0f, 0x21 }, 0, .none, .@"32bit" },
+    .{ .mov, .mr, &.{ .r64, .dr }, &.{ 0x0f, 0x21 }, 0, .none, .@"64bit" },
+    .{ .mov, .rm, &.{ .dr, .r32 }, &.{ 0x0f, 0x23 }, 0, .none, .@"32bit" },
+    .{ .mov, .rm, &.{ .dr, .r64 }, &.{ 0x0f, 0x23 }, 0, .none, .@"64bit" },
+
     .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .movbe },
     .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none,  .movbe },
     .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long,  .movbe },
@@ -425,11 +538,10 @@ pub const table = [_]Entry{
     .{ .movs,  .z, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none },
     .{ .movs,  .z, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none,  .none },
     .{ .movs,  .z, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long,  .none },
-
-    .{ .movsb, .z, &.{}, &.{ 0xa4 }, 0, .none,  .none },
-    .{ .movsw, .z, &.{}, &.{ 0xa5 }, 0, .short, .none },
-    .{ .movsd, .z, &.{}, &.{ 0xa5 }, 0, .none,  .none },
-    .{ .movsq, .z, &.{}, &.{ 0xa5 }, 0, .long,  .none },
+    .{ .movsb, .z, &.{            }, &.{ 0xa4 }, 0, .none,  .none },
+    .{ .movsw, .z, &.{            }, &.{ 0xa5 }, 0, .short, .none },
+    .{ .movsd, .z, &.{            }, &.{ 0xa5 }, 0, .none,  .none },
+    .{ .movsq, .z, &.{            }, &.{ 0xa5 }, 0, .long,  .none },
 
     .{ .movsx, .rm, &.{ .r16, .rm8  }, &.{ 0x0f, 0xbe }, 0, .short,     .none },
     .{ .movsx, .rm, &.{ .r16, .rm8  }, &.{ 0x0f, 0xbe }, 0, .rex_short, .none },
@@ -441,8 +553,8 @@ pub const table = [_]Entry{
     .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long,      .none },
 
     // This instruction is discouraged.
-    .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .none },
-    .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .none },
+    .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .@"64bit" },
+    .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .@"64bit" },
 
     .{ .movzx, .rm, &.{ .r16, .rm8  }, &.{ 0x0f, 0xb6 }, 0, .short,     .none },
     .{ .movzx, .rm, &.{ .r16, .rm8  }, &.{ 0x0f, 0xb6 }, 0, .rex_short, .none },
@@ -496,6 +608,20 @@ pub const table = [_]Entry{
     .{ .@"or", .rm, &.{ .r32,  .rm32   }, &.{ 0x0b }, 0, .none,  .none },
     .{ .@"or", .rm, &.{ .r64,  .rm64   }, &.{ 0x0b }, 0, .long,  .none },
 
+    .{ .out, .zi, &.{ .imm8, .al  }, &.{ 0xe6 }, 0, .none,  .none },
+    .{ .out, .zi, &.{ .imm8, .ax  }, &.{ 0xe7 }, 0, .short, .none },
+    .{ .out, .zi, &.{ .imm8, .eax }, &.{ 0xe7 }, 0, .none,  .none },
+    .{ .out, .z,  &.{ .dx,   .al  }, &.{ 0xee }, 0, .none,  .none },
+    .{ .out, .z,  &.{ .dx,   .ax  }, &.{ 0xef }, 0, .short, .none },
+    .{ .out, .z,  &.{ .dx,   .eax }, &.{ 0xef }, 0, .none,  .none },
+
+    .{ .outs,  .z, &.{ .dx, .m8  }, &.{ 0x6e }, 0, .none,  .none },
+    .{ .outs,  .z, &.{ .dx, .m16 }, &.{ 0x6f }, 0, .short, .none },
+    .{ .outs,  .z, &.{ .dx, .m32 }, &.{ 0x6f }, 0, .none,  .none },
+    .{ .outsb, .z, &.{           }, &.{ 0x6e }, 0, .none,  .none },
+    .{ .outsw, .z, &.{           }, &.{ 0x6f }, 0, .short, .none },
+    .{ .outsd, .z, &.{           }, &.{ 0x6f }, 0, .none,  .none },
+
     .{ .pause, .z, &.{}, &.{ 0xf3, 0x90 }, 0, .none, .none },
 
     .{ .pop, .o, &.{ .r16  }, &.{ 0x58 }, 0, .short, .none },
@@ -507,7 +633,9 @@ pub const table = [_]Entry{
     .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none,  .popcnt },
     .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long,  .popcnt },
 
-    .{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .none },
+    .{ .popf,  .z, &.{}, &.{ 0x9d }, 0, .short, .none },
+    .{ .popfd, .z, &.{}, &.{ 0x9d }, 0, .none,  .@"32bit" },
+    .{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none,  .@"64bit" },
 
     .{ .push, .o, &.{ .r16   }, &.{ 0x50 }, 0, .short, .none },
     .{ .push, .o, &.{ .r64   }, &.{ 0x50 }, 0, .none,  .none },
@@ -553,6 +681,35 @@ pub const table = [_]Entry{
     .{ .rcr, .mi, &.{ .rm32, .imm8  }, &.{ 0xc1 }, 3, .none,  .none },
     .{ .rcr, .mi, &.{ .rm64, .imm8  }, &.{ 0xc1 }, 3, .long,  .none },
 
+    .{ .rdfsbase, .m, &.{ .r32 }, &.{ 0xf3 ,0x0f, 0xae }, 0, .none, .fsgsbase },
+    .{ .rdfsbase, .m, &.{ .r64 }, &.{ 0xf3 ,0x0f, 0xae }, 0, .long, .fsgsbase },
+    .{ .rdgsbase, .m, &.{ .r32 }, &.{ 0xf3 ,0x0f, 0xae }, 1, .none, .fsgsbase },
+    .{ .rdgsbase, .m, &.{ .r64 }, &.{ 0xf3 ,0x0f, 0xae }, 1, .long, .fsgsbase },
+
+    .{ .rdmsr, .z, &.{}, &.{ 0x0f, 0x32 }, 0, .none, .none },
+
+    .{ .rdpid, .m, &.{ .r32 }, &.{ 0xf3, 0x0f, 0xc7 }, 7, .none, .@"rdpid 32bit" },
+    .{ .rdpid, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0xc7 }, 7, .none, .@"rdpid 64bit" },
+
+    .{ .rdpkru, .z, &.{}, &.{ 0x0f, 0x01, 0xee }, 0, .none, .pku },
+
+    .{ .rdpmc, .z, &.{}, &.{ 0x0f, 0x33 }, 0, .none, .none },
+
+    .{ .rdrand, .m, &.{ .r16 }, &.{ 0x0f, 0xc7 }, 6, .short, .rdrnd },
+    .{ .rdrand, .m, &.{ .r32 }, &.{ 0x0f, 0xc7 }, 6, .none,  .rdrnd },
+    .{ .rdrand, .m, &.{ .r64 }, &.{ 0x0f, 0xc7 }, 6, .long,  .rdrnd },
+
+    .{ .rdseed, .m, &.{ .r16 }, &.{ 0x0f, 0xc7 }, 7, .short, .rdseed },
+    .{ .rdseed, .m, &.{ .r32 }, &.{ 0x0f, 0xc7 }, 7, .none,  .rdseed },
+    .{ .rdseed, .m, &.{ .r64 }, &.{ 0x0f, 0xc7 }, 7, .long,  .rdseed },
+
+    .{ .rdssd, .m, &.{ .r32 }, &.{ 0xf3, 0x0f, 0x1e }, 1, .none, .shstk },
+    .{ .rdssq, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0x1e }, 1, .long, .shstk },
+
+    .{ .rdtsc, .z, &.{}, &.{ 0x0f, 0x31 }, 0, .none, .none },
+
+    .{ .rdtscp, .z, &.{}, &.{ 0x0f, 0x01, 0xf9 }, 0, .none, .none },
+
     .{ .rol, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 0, .none,  .none },
     .{ .rol, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 0, .rex,   .none },
     .{ .rol, .mc, &.{ .rm8,  .cl    }, &.{ 0xd2 }, 0, .none,  .none },
@@ -585,6 +742,10 @@ pub const table = [_]Entry{
     .{ .ror, .mi, &.{ .rm32, .imm8  }, &.{ 0xc1 }, 1, .none,  .none },
     .{ .ror, .mi, &.{ .rm64, .imm8  }, &.{ 0xc1 }, 1, .long,  .none },
 
+    .{ .rsm, .z, &.{}, &.{ 0x0f, 0xaa }, 0, .none, .none },
+
+    .{ .sahf, .z, &.{}, &.{ 0x9e }, 0, .none, .sahf },
+
     .{ .sal, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 4, .none,  .none },
     .{ .sal, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 4, .rex,   .none },
     .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none },
@@ -644,11 +805,14 @@ pub const table = [_]Entry{
     .{ .scas,  .z, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none },
     .{ .scas,  .z, &.{ .m32 }, &.{ 0xaf }, 0, .none,  .none },
     .{ .scas,  .z, &.{ .m64 }, &.{ 0xaf }, 0, .long,  .none },
+    .{ .scasb, .z, &.{      }, &.{ 0xae }, 0, .none,  .none },
+    .{ .scasw, .z, &.{      }, &.{ 0xaf }, 0, .short, .none },
+    .{ .scasd, .z, &.{      }, &.{ 0xaf }, 0, .none,  .none },
+    .{ .scasq, .z, &.{      }, &.{ 0xaf }, 0, .long,  .none },
+
+    .{ .senduipi, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0xc7 }, 6, .none, .uintr },
 
-    .{ .scasb, .z, &.{}, &.{ 0xae }, 0, .none,  .none },
-    .{ .scasw, .z, &.{}, &.{ 0xaf }, 0, .short, .none },
-    .{ .scasd, .z, &.{}, &.{ 0xaf }, 0, .none,  .none },
-    .{ .scasq, .z, &.{}, &.{ 0xaf }, 0, .long,  .none },
+    .{ .serialize, .z, &.{}, &.{ 0x0f, 0x01, 0xe8 }, 0, .none, .serialize },
 
     .{ .seta,   .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none },
     .{ .seta,   .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex,  .none },
@@ -713,6 +877,14 @@ pub const table = [_]Entry{
 
     .{ .sfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none },
 
+    .{ .sidt, .m, &.{ .m }, &.{ 0x0f, 0x01 }, 1, .none, .none },
+
+    .{ .sldt, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 0, .none, .none },
+
+    .{ .smsw, .m, &.{ .rm16    }, &.{ 0x0f, 0x01 }, 4, .short, .none },
+    .{ .smsw, .m, &.{ .r32_m16 }, &.{ 0x0f, 0x01 }, 4, .none,  .none },
+    .{ .smsw, .m, &.{ .r64_m16 }, &.{ 0x0f, 0x01 }, 4, .long,  .none },
+
     .{ .shl, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 4, .none,  .none },
     .{ .shl, .m1, &.{ .rm8,  .unity }, &.{ 0xd0 }, 4, .rex,   .none },
     .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none },
@@ -767,17 +939,18 @@ pub const table = [_]Entry{
 
     .{ .sti, .z, &.{}, &.{ 0xfb }, 0, .none, .none },
 
+    .{ .str, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 1, .none, .none },
+
     .{ .stui, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xef }, 0, .none, .uintr },
 
     .{ .stos,  .z, &.{ .m8  }, &.{ 0xaa }, 0, .none,  .none },
     .{ .stos,  .z, &.{ .m16 }, &.{ 0xab }, 0, .short, .none },
     .{ .stos,  .z, &.{ .m32 }, &.{ 0xab }, 0, .none,  .none },
     .{ .stos,  .z, &.{ .m64 }, &.{ 0xab }, 0, .long,  .none },
-
-    .{ .stosb, .z, &.{}, &.{ 0xaa }, 0, .none,  .none },
-    .{ .stosw, .z, &.{}, &.{ 0xab }, 0, .short, .none },
-    .{ .stosd, .z, &.{}, &.{ 0xab }, 0, .none,  .none },
-    .{ .stosq, .z, &.{}, &.{ 0xab }, 0, .long,  .none },
+    .{ .stosb, .z, &.{      }, &.{ 0xaa }, 0, .none,  .none },
+    .{ .stosw, .z, &.{      }, &.{ 0xab }, 0, .short, .none },
+    .{ .stosd, .z, &.{      }, &.{ 0xab }, 0, .none,  .none },
+    .{ .stosq, .z, &.{      }, &.{ 0xab }, 0, .long,  .none },
 
     .{ .sub, .zi, &.{ .al,   .imm8   }, &.{ 0x2c }, 0, .none,  .none },
     .{ .sub, .zi, &.{ .ax,   .imm16  }, &.{ 0x2d }, 0, .short, .none },
@@ -802,7 +975,17 @@ pub const table = [_]Entry{
     .{ .sub, .rm, &.{ .r32,  .rm32   }, &.{ 0x2b }, 0, .none,  .none },
     .{ .sub, .rm, &.{ .r64,  .rm64   }, &.{ 0x2b }, 0, .long,  .none },
 
-    .{ .syscall, .z, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none },
+    .{ .swapgs, .z, &.{}, &.{ 0x0f, 0x01, 0xf8 }, 0, .none, .@"64bit" },
+
+    .{ .syscall, .z, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .@"64bit" },
+
+    .{ .sysenter, .z, &.{}, &.{ 0x0f, 0x34 }, 0, .none, .none },
+
+    .{ .sysexit, .z, &.{}, &.{ 0x0f, 0x35 }, 0, .none, .none },
+    .{ .sysexit, .z, &.{}, &.{ 0x0f, 0x35 }, 0, .long, .none },
+
+    .{ .sysret, .z, &.{}, &.{ 0x0f, 0x37 }, 0, .none, .none },
+    .{ .sysret, .z, &.{}, &.{ 0x0f, 0x37 }, 0, .long, .none },
 
     .{ .@"test", .zi, &.{ .al,   .imm8   }, &.{ 0xa8 }, 0, .none,  .none },
     .{ .@"test", .zi, &.{ .ax,   .imm16  }, &.{ 0xa9 }, 0, .short, .none },
@@ -819,12 +1002,38 @@ pub const table = [_]Entry{
     .{ .@"test", .mr, &.{ .rm32, .r32    }, &.{ 0x85 }, 0, .none,  .none },
     .{ .@"test", .mr, &.{ .rm64, .r64    }, &.{ 0x85 }, 0, .long,  .none },
 
-    .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
-    .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none,  .bmi },
-    .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long,  .bmi },
+    .{ .testui, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xed }, 0, .none, .uintr },
+
+    .{ .tpause, .m, &.{ .r32 }, &.{ 0x66, 0x0f, 0xae }, 6, .none, .waitpkg },
 
+    .{ .ud0, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xff }, 0, .none, .none },
+    .{ .ud1, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xb9 }, 0, .none, .none },
     .{ .ud2, .z, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none },
 
+    .{ .uiret, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xec }, 0, .none, .uintr },
+
+    .{ .umonitor, .m, &.{ .r64 }, &.{ 0xf3, 0x0f, 0xae }, 6, .none, .waitpkg },
+
+    .{ .umwait, .m, &.{ .r32 }, &.{ 0xf2, 0x0f, 0xae }, 6, .none, .waitpkg },
+
+    .{ .verr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 4, .none, .none },
+    .{ .verw, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 5, .none, .none },
+
+    .{ .wrfsbase, .m, &.{ .r32 }, &.{ 0xf3 ,0x0f, 0xae }, 2, .none, .fsgsbase },
+    .{ .wrfsbase, .m, &.{ .r64 }, &.{ 0xf3 ,0x0f, 0xae }, 2, .long, .fsgsbase },
+    .{ .wrgsbase, .m, &.{ .r32 }, &.{ 0xf3 ,0x0f, 0xae }, 3, .none, .fsgsbase },
+    .{ .wrgsbase, .m, &.{ .r64 }, &.{ 0xf3 ,0x0f, 0xae }, 3, .long, .fsgsbase },
+
+    .{ .wrmsr, .z, &.{}, &.{ 0x0f, 0x30 }, 0, .none, .none },
+
+    .{ .wrpkru, .z, &.{}, &.{ 0x0f, 0x01, 0xef }, 0, .none, .pku },
+
+    .{ .wrssd, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf6 }, 0, .none, .shstk },
+    .{ .wrssq, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf6 }, 0, .long, .shstk },
+
+    .{ .wrussd, .mr, &.{ .m32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf5 }, 0, .none, .shstk },
+    .{ .wrussq, .mr, &.{ .m64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf5 }, 0, .long, .shstk },
+
     .{ .xadd, .mr, &.{ .rm8,  .r8  }, &.{ 0x0f, 0xc0 }, 0, .none,  .none },
     .{ .xadd, .mr, &.{ .rm8,  .r8  }, &.{ 0x0f, 0xc0 }, 0, .rex,   .none },
     .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .short, .none },
@@ -850,6 +1059,11 @@ pub const table = [_]Entry{
 
     .{ .xgetbv, .z, &.{}, &.{ 0x0f, 0x01, 0xd0 }, 0, .none, .none },
 
+    .{ .xlat,  .z, &.{ .m8 }, &.{ 0xd7 }, 0, .none, .@"32bit" },
+    .{ .xlat,  .z, &.{ .m8 }, &.{ 0xd7 }, 0, .long, .@"64bit" },
+    .{ .xlatb, .z, &.{     }, &.{ 0xd7 }, 0, .none, .@"32bit" },
+    .{ .xlatb, .z, &.{     }, &.{ 0xd7 }, 0, .long, .@"64bit" },
+
     .{ .xor, .zi, &.{ .al,   .imm8   }, &.{ 0x34 }, 0, .none,  .none },
     .{ .xor, .zi, &.{ .ax,   .imm16  }, &.{ 0x35 }, 0, .short, .none },
     .{ .xor, .zi, &.{ .eax,  .imm32  }, &.{ 0x35 }, 0, .none,  .none },
@@ -874,12 +1088,96 @@ pub const table = [_]Entry{
     .{ .xor, .rm, &.{ .r64,  .rm64   }, &.{ 0x33 }, 0, .long,  .none },
 
     // X87
+    .{ .f2xm1, .z, &.{}, &.{ 0xd9, 0xf0 }, 0, .none, .x87 },
+
     .{ .fabs, .z, &.{}, &.{ 0xd9, 0xe1 }, 0, .none, .x87 },
 
+    .{ .fadd,  .m,  &.{ .m32      }, &.{ 0xd8       }, 0, .none, .x87 },
+    .{ .fadd,  .m,  &.{ .m64      }, &.{ 0xdc       }, 0, .none, .x87 },
+    .{ .fadd,  .zo, &.{ .st0, .st }, &.{ 0xd8, 0xc0 }, 0, .none, .x87 },
+    .{ .fadd,  .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xc0 }, 0, .none, .x87 },
+    .{ .faddp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xc0 }, 0, .none, .x87 },
+    .{ .faddp, .z,  &.{           }, &.{ 0xde, 0xc1 }, 0, .none, .x87 },
+    .{ .fiadd, .m,  &.{ .m32      }, &.{ 0xda       }, 0, .none, .x87 },
+    .{ .fiadd, .m,  &.{ .m16      }, &.{ 0xde       }, 0, .none, .x87 },
+
+    .{ .fbld, .m, &.{ .m80 }, &.{ 0xdf }, 4, .none, .x87 },
+
+    .{ .fbstp, .m, &.{ .m80 }, &.{ 0xdf }, 6, .none, .x87 },
+
     .{ .fchs, .z, &.{}, &.{ 0xd9, 0xe0 }, 0, .none, .x87 },
 
+    .{ .fclex,  .z, &.{}, &.{ 0xdb, 0xe2 }, 0, .wait, .x87 },
+    .{ .fnclex, .z, &.{}, &.{ 0xdb, 0xe2 }, 0, .none, .x87 },
+
+    .{ .fcmovb,   .zo, &.{ .st0, .st }, &.{ 0xda, 0xc0 }, 0, .none, .@"cmov x87" },
+    .{ .fcmove,   .zo, &.{ .st0, .st }, &.{ 0xda, 0xc8 }, 0, .none, .@"cmov x87" },
+    .{ .fcmovbe,  .zo, &.{ .st0, .st }, &.{ 0xda, 0xd0 }, 0, .none, .@"cmov x87" },
+    .{ .fcmovu,   .zo, &.{ .st0, .st }, &.{ 0xda, 0xd8 }, 0, .none, .@"cmov x87" },
+    .{ .fcmovnb,  .zo, &.{ .st0, .st }, &.{ 0xdb, 0xc0 }, 0, .none, .@"cmov x87" },
+    .{ .fcmovne,  .zo, &.{ .st0, .st }, &.{ 0xdb, 0xc8 }, 0, .none, .@"cmov x87" },
+    .{ .fcmovnbe, .zo, &.{ .st0, .st }, &.{ 0xdb, 0xd0 }, 0, .none, .@"cmov x87" },
+    .{ .fcmovnu,  .zo, &.{ .st0, .st }, &.{ 0xdb, 0xd8 }, 0, .none, .@"cmov x87" },
+
+    .{ .fcom,   .m, &.{ .m32 }, &.{ 0xd8       }, 2, .none, .x87 },
+    .{ .fcom,   .m, &.{ .m64 }, &.{ 0xdc       }, 2, .none, .x87 },
+    .{ .fcom,   .o, &.{ .st  }, &.{ 0xd8, 0xd0 }, 0, .none, .x87 },
+    .{ .fcom,   .z, &.{      }, &.{ 0xd8, 0xd1 }, 0, .none, .x87 },
+    .{ .fcomp,  .m, &.{ .m32 }, &.{ 0xd8       }, 3, .none, .x87 },
+    .{ .fcomp,  .m, &.{ .m64 }, &.{ 0xdc       }, 3, .none, .x87 },
+    .{ .fcomp,  .o, &.{ .st  }, &.{ 0xd8, 0xd8 }, 0, .none, .x87 },
+    .{ .fcomp,  .z, &.{      }, &.{ 0xd8, 0xd9 }, 0, .none, .x87 },
+    .{ .fcompp, .z, &.{      }, &.{ 0xde, 0xd9 }, 0, .none, .x87 },
+
+    .{ .fcomi,   .zo, &.{ .st0, .st }, &.{ 0xdb, 0xf0 }, 0, .none, .x87 },
+    .{ .fcomip,  .zo, &.{ .st0, .st }, &.{ 0xdf, 0xf0 }, 0, .none, .x87 },
+    .{ .fucomi,  .zo, &.{ .st0, .st }, &.{ 0xdb, 0xe8 }, 0, .none, .x87 },
+    .{ .fucomip, .zo, &.{ .st0, .st }, &.{ 0xdf, 0xe8 }, 0, .none, .x87 },
+
+    .{ .fcos, .z, &.{}, &.{ 0xd9, 0xff }, 0, .none, .x87 },
+
+    .{ .fdecstp, .z, &.{}, &.{ 0xd9, 0xf6 }, 0, .none, .x87 },
+
+    .{ .fdiv,  .m,  &.{ .m32      }, &.{ 0xd8       }, 6, .none, .x87 },
+    .{ .fdiv,  .m,  &.{ .m64      }, &.{ 0xdc       }, 6, .none, .x87 },
+    .{ .fdiv,  .zo, &.{ .st0, .st }, &.{ 0xd8, 0xf0 }, 0, .none, .x87 },
+    .{ .fdiv,  .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xf8 }, 0, .none, .x87 },
+    .{ .fdivp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xf8 }, 0, .none, .x87 },
+    .{ .fdivp, .z,  &.{           }, &.{ 0xde, 0xf9 }, 0, .none, .x87 },
+    .{ .fidiv, .m,  &.{ .m32      }, &.{ 0xda       }, 6, .none, .x87 },
+    .{ .fidiv, .m,  &.{ .m16      }, &.{ 0xde       }, 6, .none, .x87 },
+
+    .{ .fdivr,  .m,  &.{ .m32      }, &.{ 0xd8       }, 7, .none, .x87 },
+    .{ .fdivr,  .m,  &.{ .m64      }, &.{ 0xdc       }, 7, .none, .x87 },
+    .{ .fdivr,  .zo, &.{ .st0, .st }, &.{ 0xd8, 0xf8 }, 0, .none, .x87 },
+    .{ .fdivr,  .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xf0 }, 0, .none, .x87 },
+    .{ .fdivrp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xf0 }, 0, .none, .x87 },
+    .{ .fdivrp, .z,  &.{           }, &.{ 0xde, 0xf1 }, 0, .none, .x87 },
+    .{ .fidivr, .m,  &.{ .m32      }, &.{ 0xda       }, 7, .none, .x87 },
+    .{ .fidivr, .m,  &.{ .m16      }, &.{ 0xde       }, 7, .none, .x87 },
+
     .{ .ffree, .o, &.{ .st }, &.{ 0xdd, 0xc0 }, 0, .none, .x87 },
 
+    .{ .ficom,  .m, &.{ .m16 }, &.{ 0xde }, 2, .none, .x87 },
+    .{ .ficom,  .m, &.{ .m32 }, &.{ 0xda }, 2, .none, .x87 },
+    .{ .ficomp, .m, &.{ .m16 }, &.{ 0xde }, 3, .none, .x87 },
+    .{ .ficomp, .m, &.{ .m32 }, &.{ 0xda }, 3, .none, .x87 },
+
+    .{ .fild, .m, &.{ .m16 }, &.{ 0xdf }, 0, .none, .x87 },
+    .{ .fild, .m, &.{ .m32 }, &.{ 0xdb }, 0, .none, .x87 },
+    .{ .fild, .m, &.{ .m64 }, &.{ 0xdf }, 5, .none, .x87 },
+
+    .{ .fincstp, .z, &.{}, &.{ 0xd9, 0xf7 }, 0, .none, .x87 },
+
+    .{ .finit,  .z, &.{}, &.{ 0xdb, 0xe3 }, 0, .wait, .x87 },
+    .{ .fninit, .z, &.{}, &.{ 0xdb, 0xe3 }, 0, .none, .x87 },
+
+    .{ .fist,  .m, &.{ .m16 }, &.{ 0xdf }, 2, .none, .x87 },
+    .{ .fist,  .m, &.{ .m32 }, &.{ 0xdb }, 2, .none, .x87 },
+    .{ .fistp, .m, &.{ .m16 }, &.{ 0xdf }, 3, .none, .x87 },
+    .{ .fistp, .m, &.{ .m32 }, &.{ 0xdb }, 3, .none, .x87 },
+    .{ .fistp, .m, &.{ .m64 }, &.{ 0xdf }, 7, .none, .x87 },
+
     .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 },
     .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 },
     .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 },
@@ -889,8 +1187,52 @@ pub const table = [_]Entry{
     .{ .fld, .m, &.{ .m80 }, &.{ 0xdb       }, 5, .none, .x87 },
     .{ .fld, .o, &.{ .st  }, &.{ 0xd9, 0xc0 }, 0, .none, .x87 },
 
+    .{ .fld1,   .z, &.{}, &.{ 0xd9, 0xe8 }, 0, .none, .x87 },
+    .{ .fldl2t, .z, &.{}, &.{ 0xd9, 0xe9 }, 0, .none, .x87 },
+    .{ .fldl2e, .z, &.{}, &.{ 0xd9, 0xea }, 0, .none, .x87 },
+    .{ .fldpi,  .z, &.{}, &.{ 0xd9, 0xeb }, 0, .none, .x87 },
+    .{ .fldlg2, .z, &.{}, &.{ 0xd9, 0xec }, 0, .none, .x87 },
+    .{ .fldln2, .z, &.{}, &.{ 0xd9, 0xed }, 0, .none, .x87 },
+    .{ .fldz,   .z, &.{}, &.{ 0xd9, 0xee }, 0, .none, .x87 },
+
+    .{ .fldcw, .m, &.{ .m16 }, &.{ 0xd9 }, 5, .none, .x87 },
+
     .{ .fldenv, .m, &.{ .m }, &.{ 0xd9 }, 4, .none, .x87 },
 
+    .{ .fmul,  .m,  &.{ .m32      }, &.{ 0xd8       }, 1, .none, .x87 },
+    .{ .fmul,  .m,  &.{ .m64      }, &.{ 0xdc       }, 1, .none, .x87 },
+    .{ .fmul,  .zo, &.{ .st0, .st }, &.{ 0xd8, 0xc8 }, 0, .none, .x87 },
+    .{ .fmul,  .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xc8 }, 0, .none, .x87 },
+    .{ .fmulp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xc8 }, 0, .none, .x87 },
+    .{ .fmulp, .z,  &.{           }, &.{ 0xde, 0xc9 }, 0, .none, .x87 },
+    .{ .fimul, .m,  &.{ .m32      }, &.{ 0xda       }, 1, .none, .x87 },
+    .{ .fimul, .m,  &.{ .m16      }, &.{ 0xde       }, 1, .none, .x87 },
+
+    .{ .fnop, .z, &.{}, &.{ 0xd9, 0xd0 }, 0, .none, .x87 },
+
+    .{ .fpatan, .z, &.{}, &.{ 0xd9, 0xf3 }, 0, .none, .x87 },
+
+    .{ .fprem, .z, &.{}, &.{ 0xd9, 0xf8 }, 0, .none, .x87 },
+
+    .{ .fprem1, .z, &.{}, &.{ 0xd9, 0xf5 }, 0, .none, .x87 },
+
+    .{ .fptan, .z, &.{}, &.{ 0xd9, 0xf2 }, 0, .none, .x87 },
+
+    .{ .frndint, .z, &.{}, &.{ 0xd9, 0xfc }, 0, .none, .x87 },
+
+    .{ .frstor, .m, &.{ .m }, &.{ 0xdd }, 4, .none, .x87 },
+
+    .{ .fsave,  .m, &.{ .m }, &.{ 0xdd }, 6, .wait, .x87 },
+    .{ .fnsave, .m, &.{ .m }, &.{ 0xdd }, 6, .none, .x87 },
+
+    .{ .fscale, .z, &.{}, &.{ 0xd9, 0xfd }, 0, .none, .x87 },
+
+    .{ .fsin, .z, &.{}, &.{ 0xd9, 0xfe }, 0, .none, .x87 },
+
+    .{ .fsincos, .z, &.{}, &.{ 0xd9, 0xfb }, 0, .none, .x87 },
+
+    .{ .fsqrt, .z, &.{}, &.{ 0xd9, 0xfa }, 0, .none, .x87 },
+
     .{ .fst,  .m, &.{ .m32 }, &.{ 0xd9       }, 2, .none, .x87 },
     .{ .fst,  .m, &.{ .m64 }, &.{ 0xdd       }, 2, .none, .x87 },
     .{ .fst,  .o, &.{ .st  }, &.{ 0xdd, 0xd0 }, 0, .none, .x87 },
@@ -899,8 +1241,59 @@ pub const table = [_]Entry{
     .{ .fstp, .m, &.{ .m80 }, &.{ 0xdb       }, 7, .none, .x87 },
     .{ .fstp, .o, &.{ .st  }, &.{ 0xdd, 0xd8 }, 0, .none, .x87 },
 
-    .{ .fstenv,  .m, &.{ .m }, &.{ 0x9b, 0xd9 }, 6, .none, .x87 },
-    .{ .fnstenv, .m, &.{ .m }, &.{       0xd9 }, 6, .none, .x87 },
+    .{ .fstcw,  .m, &.{ .m16 }, &.{ 0xd9 }, 7, .wait, .x87 },
+    .{ .fnstcw, .m, &.{ .m16 }, &.{ 0xd9 }, 7, .none, .x87 },
+
+    .{ .fstenv,  .m, &.{ .m }, &.{ 0xd9 }, 6, .wait, .x87 },
+    .{ .fnstenv, .m, &.{ .m }, &.{ 0xd9 }, 6, .none, .x87 },
+
+    .{ .fstsw,  .m, &.{ .m16 }, &.{ 0xdd }, 7, .wait, .x87 },
+    .{ .fstsw,  .m, &.{ .ax  }, &.{ 0xdf }, 4, .wait, .x87 },
+    .{ .fnstsw, .m, &.{ .m16 }, &.{ 0xdd }, 7, .none, .x87 },
+    .{ .fnstsw, .m, &.{ .ax  }, &.{ 0xdf }, 4, .none, .x87 },
+
+    .{ .fsub,  .m,  &.{ .m32      }, &.{ 0xd8       }, 4, .none, .x87 },
+    .{ .fsub,  .m,  &.{ .m64      }, &.{ 0xdc       }, 4, .none, .x87 },
+    .{ .fsub,  .zo, &.{ .st0, .st }, &.{ 0xd8, 0xe0 }, 0, .none, .x87 },
+    .{ .fsub,  .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xe8 }, 0, .none, .x87 },
+    .{ .fsubp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xe8 }, 0, .none, .x87 },
+    .{ .fsubp, .z,  &.{           }, &.{ 0xde, 0xe9 }, 0, .none, .x87 },
+    .{ .fisub, .m,  &.{ .m32      }, &.{ 0xda       }, 4, .none, .x87 },
+    .{ .fisub, .m,  &.{ .m16      }, &.{ 0xde       }, 4, .none, .x87 },
+
+    .{ .fsubr,  .m,  &.{ .m32      }, &.{ 0xd8       }, 5, .none, .x87 },
+    .{ .fsubr,  .m,  &.{ .m64      }, &.{ 0xdc       }, 5, .none, .x87 },
+    .{ .fsubr,  .zo, &.{ .st0, .st }, &.{ 0xd8, 0xe8 }, 0, .none, .x87 },
+    .{ .fsubr,  .oz, &.{ .st, .st0 }, &.{ 0xdc, 0xe0 }, 0, .none, .x87 },
+    .{ .fsubrp, .oz, &.{ .st, .st0 }, &.{ 0xde, 0xe0 }, 0, .none, .x87 },
+    .{ .fsubrp, .z,  &.{           }, &.{ 0xde, 0xe1 }, 0, .none, .x87 },
+    .{ .fisubr, .m,  &.{ .m32      }, &.{ 0xda       }, 5, .none, .x87 },
+    .{ .fisubr, .m,  &.{ .m16      }, &.{ 0xde       }, 5, .none, .x87 },
+
+    .{ .ftst, .z, &.{}, &.{ 0xd9, 0xe4 }, 0, .none, .x87 },
+
+    .{ .fucom,   .o, &.{ .st }, &.{ 0xdd, 0xe0 }, 0, .none, .x87 },
+    .{ .fucom,   .z, &.{     }, &.{ 0xdd, 0xe1 }, 0, .none, .x87 },
+    .{ .fucomp,  .o, &.{ .st }, &.{ 0xdd, 0xe8 }, 0, .none, .x87 },
+    .{ .fucomp,  .z, &.{     }, &.{ 0xdd, 0xe9 }, 0, .none, .x87 },
+    .{ .fucompp, .z, &.{     }, &.{ 0xda, 0xe9 }, 0, .none, .x87 },
+
+    .{ .fxam, .z, &.{}, &.{ 0xd9, 0xe5 }, 0, .none, .x87 },
+
+    .{ .fxch, .o, &.{ .st }, &.{ 0xd9, 0xc8 }, 0, .none, .x87 },
+    .{ .fxch, .z, &.{     }, &.{ 0xd9, 0xc9 }, 0, .none, .x87 },
+
+    .{ .fxtract, .z, &.{}, &.{ 0xd9, 0xf4 }, 0, .none, .x87 },
+
+    .{ .fyl2x, .z, &.{}, &.{ 0xd9, 0xf1 }, 0, .none, .x87 },
+
+    .{ .fyl2xp1, .z, &.{}, &.{ 0xd9, 0xf9 }, 0, .none, .x87 },
+
+    .{ .wait,  .z, &.{}, &.{ 0x9b }, 0, .none, .x87 },
+    .{ .fwait, .z, &.{}, &.{ 0x9b }, 0, .none, .x87 },
+
+    // MMX
+    .{ .emms, .z, &.{}, &.{ 0x0f, 0x77 }, 0, .none, .mmx },
 
     // SSE
     .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse },
@@ -915,6 +1308,8 @@ pub const table = [_]Entry{
 
     .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse },
 
+    .{ .comiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2f }, 0, .none, .sse },
+
     .{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse },
 
     .{ .cvtps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2d }, 0, .none, .sse },
@@ -934,6 +1329,12 @@ pub const table = [_]Entry{
 
     .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse },
 
+    .{ .fxrstor,   .m, &.{ .m }, &.{ 0x0f, 0xae }, 1, .none, .fxsr },
+    .{ .fxrstor64, .m, &.{ .m }, &.{ 0x0f, 0xae }, 1, .long, .fxsr },
+
+    .{ .fxsave,   .m, &.{ .m }, &.{ 0x0f, 0xae }, 0, .none, .fxsr },
+    .{ .fxsave64, .m, &.{ .m }, &.{ 0x0f, 0xae }, 0, .long, .fxsr },
+
     .{ .ldmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 2, .none, .sse },
 
     .{ .maxps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .none, .sse },
@@ -1004,6 +1405,8 @@ pub const table = [_]Entry{
 
     .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 },
 
+    .{ .comisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2f }, 0, .none, .sse2 },
+
     .{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 },
 
     .{ .cvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .none, .sse2 },
@@ -1043,6 +1446,12 @@ pub const table = [_]Entry{
 
     .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 },
 
+    .{ .gf2p8affineinvqb, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xcf }, 0, .none, .gfni },
+
+    .{ .gf2p8affineqb, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xce }, 0, .none, .gfni },
+
+    .{ .gf2p8mulb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xcf }, 0, .none, .gfni },
+
     .{ .maxpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .none, .sse2 },
 
     .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .none, .sse2 },
@@ -1203,6 +1612,16 @@ pub const table = [_]Entry{
     .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .none, .sse2 },
 
     // SSE3
+    .{ .addsubpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd0 }, 0, .none, .sse3 },
+
+    .{ .addsubps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xd0 }, 0, .none, .sse3 },
+
+    .{ .haddpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x7c }, 0, .none, .sse3 },
+
+    .{ .haddps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0x7c }, 0, .none, .sse3 },
+
+    .{ .lddqu, .rm, &.{ .xmm, .m128 }, &.{ 0xf2, 0x0f, 0xf0 }, 0, .none, .sse3 },
+
     .{ .movddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .none, .sse3 },
 
     .{ .movshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .none, .sse3 },
@@ -1226,20 +1645,24 @@ pub const table = [_]Entry{
 
     .{ .blendps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .none, .sse4_1 },
 
-    .{ .blendvpd, .rm,  &.{ .xmm, .xmm_m128        }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 },
+    .{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128        }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 },
     .{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 },
 
-    .{ .blendvps, .rm,  &.{ .xmm, .xmm_m128        }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 },
+    .{ .blendvps, .rm0, &.{ .xmm, .xmm_m128        }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 },
     .{ .blendvps, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 },
 
+    .{ .dppd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x41 }, 0, .none, .sse4_1 },
+
+    .{ .dpps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x40 }, 0, .none, .sse4_1 },
+
     .{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 },
 
     .{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 },
 
     .{ .packusdw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .none, .sse4_1 },
 
-    .{ .pblendvb, .rm, &.{ .xmm, .xmm_m128        }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 },
-    .{ .pblendvb, .rm, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 },
+    .{ .pblendvb, .rm0, &.{ .xmm, .xmm_m128        }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 },
+    .{ .pblendvb, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 },
 
     .{ .pblendw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0e }, 0, .none, .sse4_1 },
 
@@ -1296,6 +1719,13 @@ pub const table = [_]Entry{
     .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 },
 
     // SSE4.2
+    .{ .crc32, .rm, &.{ .r32, .rm8  }, &.{ 0xf2, 0x0f, 0x38, 0xf0 }, 0, .none,  .crc32 },
+    .{ .crc32, .rm, &.{ .r32, .rm8  }, &.{ 0xf2, 0x0f, 0x38, 0xf0 }, 0, .rex,   .crc32 },
+    .{ .crc32, .rm, &.{ .r32, .rm16 }, &.{ 0xf2, 0x0f, 0x38, 0xf1 }, 0, .short, .crc32 },
+    .{ .crc32, .rm, &.{ .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf1 }, 0, .none,  .crc32 },
+    .{ .crc32, .rm, &.{ .r64, .rm8  }, &.{ 0xf2, 0x0f, 0x38, 0xf0 }, 0, .long,  .crc32 },
+    .{ .crc32, .rm, &.{ .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf1 }, 0, .long,  .crc32 },
+
     .{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 },
 
     // PCLMUL
@@ -1315,14 +1745,40 @@ pub const table = [_]Entry{
     .{ .aeskeygenassist, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xdf }, 0, .none, .aes },
 
     // SHA
-    .{ .sha256msg1, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcc }, 0, .none, .sha },
+    .{ .sha1rnds4, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0x3a, 0xcc }, 0, .none, .sha },
 
-    .{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha },
+    .{ .sha1nexte, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xc8 }, 0, .none, .sha },
+
+    .{ .sha1msg1, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xc9 }, 0, .none, .sha },
+
+    .{ .sha1msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xca }, 0, .none, .sha },
 
-    .{ .sha256rnds2, .rm,  &.{ .xmm, .xmm_m128        }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha },
+    .{ .sha256rnds2, .rm0, &.{ .xmm, .xmm_m128        }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha },
     .{ .sha256rnds2, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha },
 
+    .{ .sha256msg1, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcc }, 0, .none, .sha },
+
+    .{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha },
+
     // AVX
+    .{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi },
+    .{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi },
+
+    .{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi },
+    .{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi },
+
+    .{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi },
+    .{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi },
+
+    .{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi },
+    .{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi },
+
+    .{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi },
+    .{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi },
+
+    .{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
+    .{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
+
     .{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 },
     .{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 },
 
@@ -1333,6 +1789,10 @@ pub const table = [_]Entry{
     .{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
     .{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
 
+    .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
+    .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none,  .bmi },
+    .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long,  .bmi },
+
     .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
     .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
 
@@ -1343,6 +1803,12 @@ pub const table = [_]Entry{
 
     .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
 
+    .{ .vaddsubpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd0 }, 0, .vex_128_wig, .avx },
+    .{ .vaddsubpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd0 }, 0, .vex_256_wig, .avx },
+
+    .{ .vaddsubps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xd0 }, 0, .vex_128_wig, .avx },
+    .{ .vaddsubps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0xd0 }, 0, .vex_256_wig, .avx },
+
     .{ .vaesdec, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xde }, 0, .vex_128_wig, .@"aes avx" },
 
     .{ .vaesdeclast, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xdf }, 0, .vex_128_wig, .@"aes avx" },
@@ -1394,6 +1860,10 @@ pub const table = [_]Entry{
 
     .{ .vcmpss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx },
 
+    .{ .vcomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2f }, 0, .vex_lig_wig, .avx },
+
+    .{ .vcomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2f }, 0, .vex_lig_wig, .avx },
+
     .{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64  }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
     .{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
 
@@ -1440,6 +1910,11 @@ pub const table = [_]Entry{
     .{ .vcvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w0, .avx },
     .{ .vcvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w1, .avx },
 
+    .{ .vdppd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x41 }, 0, .vex_128_wig, .avx },
+
+    .{ .vdpps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x40 }, 0, .vex_128_wig, .avx },
+    .{ .vdpps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x40 }, 0, .vex_256_wig, .avx },
+
     .{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx },
     .{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx },
 
@@ -1454,10 +1929,28 @@ pub const table = [_]Entry{
 
     .{ .vextractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .vex_128_wig, .avx },
 
+    .{ .vgf2p8affineinvqb, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xcf }, 0, .vex_128_w1, .@"gfni avx" },
+    .{ .vgf2p8affineinvqb, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xcf }, 0, .vex_256_w1, .@"gfni avx" },
+
+    .{ .vgf2p8affineqb, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xce }, 0, .vex_128_w1, .@"gfni avx" },
+    .{ .vgf2p8affineqb, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0xce }, 0, .vex_256_w1, .@"gfni avx" },
+
+    .{ .vgf2p8mulb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xcf }, 0, .vex_128_w0, .@"gfni avx" },
+    .{ .vgf2p8mulb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xcf }, 0, .vex_256_w0, .@"gfni avx" },
+
+    .{ .vhaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x7c }, 0, .vex_128_wig, .avx },
+    .{ .vhaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x7c }, 0, .vex_256_wig, .avx },
+
+    .{ .vhaddps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0x7c }, 0, .vex_128_wig, .avx },
+    .{ .vhaddps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x7c }, 0, .vex_256_wig, .avx },
+
     .{ .vinsertf128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x18 }, 0, .vex_256_w0, .avx },
 
     .{ .vinsertps, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .vex_128_wig, .avx },
 
+    .{ .vlddqu, .rm, &.{ .xmm, .m128 }, &.{ 0xf2, 0x0f, 0xf0 }, 0, .vex_128_wig, .avx },
+    .{ .vlddqu, .rm, &.{ .ymm, .m256 }, &.{ 0xf2, 0x0f, 0xf0 }, 0, .vex_256_wig, .avx },
+
     .{ .vldmxcsr, .m, &.{ .m32 }, &.{ 0x0f, 0xae }, 2, .vex_lz_wig, .avx },
 
     .{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
@@ -1821,15 +2314,6 @@ pub const table = [_]Entry{
     // VPCLMULQDQ
     .{ .vpclmulqdq, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .vex_256_wig, .vpclmulqdq },
 
-    // VAES
-    .{ .vaesdec, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xde }, 0, .vex_256_wig, .vaes },
-
-    .{ .vaesdeclast, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdf }, 0, .vex_256_wig, .vaes },
-
-    .{ .vaesenc, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdc }, 0, .vex_256_wig, .vaes },
-
-    .{ .vaesenclast, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdd }, 0, .vex_256_wig, .vaes },
-
     // AVX2
     .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 },
     .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
@@ -1992,5 +2476,46 @@ pub const table = [_]Entry{
     .{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 },
 
     .{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 },
+
+    // ADX
+    .{ .adcx, .rm, &.{ .r32, .rm32 }, &.{ 0x66, 0x0f, 0x38, 0xf6 }, 0, .none, .adx },
+    .{ .adcx, .rm, &.{ .r64, .rm64 }, &.{ 0x66, 0x0f, 0x38, 0xf6 }, 0, .long, .adx },
+
+    .{ .adox, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0x38, 0xf6 }, 0, .none, .adx },
+    .{ .adox, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0x38, 0xf6 }, 0, .long, .adx },
+
+    // VAES
+    .{ .vaesdec, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xde }, 0, .vex_256_wig, .vaes },
+
+    .{ .vaesdeclast, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdf }, 0, .vex_256_wig, .vaes },
+
+    .{ .vaesenc, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdc }, 0, .vex_256_wig, .vaes },
+
+    .{ .vaesenclast, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xdd }, 0, .vex_256_wig, .vaes },
+
+    // AESKLE
+    .{ .aesdec128kl, .rm, &.{ .xmm, .m }, &.{ 0xf3, 0x0f, 0x38, 0xdd }, 0, .none, .kl },
+
+    .{ .aesdec256kl, .rm, &.{ .xmm, .m }, &.{ 0xf3, 0x0f, 0x38, 0xdf }, 0, .none, .kl },
+
+    .{ .aesenc128kl, .rm, &.{ .xmm, .m }, &.{ 0xf3, 0x0f, 0x38, 0xdc }, 0, .none, .kl },
+
+    .{ .aesenc256kl, .rm, &.{ .xmm, .m }, &.{ 0xf3, 0x0f, 0x38, 0xde }, 0, .none, .kl },
+
+    .{ .encodekey128, .rm, &.{ .r32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xfa }, 0, .none, .kl },
+
+    .{ .encodekey256, .rm, &.{ .r32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xfb }, 0, .none, .kl },
+
+    .{ .loadiwkey, .rm, &.{ .xmm, .xmm              }, &.{ 0xf3, 0x0f, 0x38, 0xdc }, 0, .none, .kl },
+    .{ .loadiwkey, .rm, &.{ .xmm, .xmm, .eax, .xmm0 }, &.{ 0xf3, 0x0f, 0x38, 0xdc }, 0, .none, .kl },
+
+    // AESKLEWIDE_KL
+    .{ .aesdecwide128kl, .m, &.{ .m }, &.{ 0xf3, 0x0f, 0x38, 0xd8 }, 1, .none, .widekl },
+
+    .{ .aesdecwide256kl, .m, &.{ .m }, &.{ 0xf3, 0x0f, 0x38, 0xd8 }, 3, .none, .widekl },
+
+    .{ .aesencwide128kl, .m, &.{ .m }, &.{ 0xf3, 0x0f, 0x38, 0xd8 }, 0, .none, .widekl },
+
+    .{ .aesencwide256kl, .m, &.{ .m }, &.{ 0xf3, 0x0f, 0x38, 0xd8 }, 2, .none, .widekl },
 };
 // zig fmt: on
src/arch/x86_64/Lower.zig
@@ -359,6 +359,8 @@ pub fn imm(lower: *const Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
         .pseudo_dbg_local_ai_s,
         => .s(@bitCast(i)),
 
+        .ii,
+        .ir,
         .rrri,
         .rri_u,
         .ri_u,
@@ -548,17 +550,19 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
 }
 
 fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
+    @setEvalBranchQuota(2_400);
     const fixes = switch (inst.ops) {
         .none => inst.data.none.fixes,
         .inst => inst.data.inst.fixes,
         .i_s, .i_u => inst.data.i.fixes,
+        .ii => inst.data.ii.fixes,
         .r => inst.data.r.fixes,
         .rr => inst.data.rr.fixes,
         .rrr => inst.data.rrr.fixes,
         .rrrr => inst.data.rrrr.fixes,
         .rrri => inst.data.rrri.fixes,
         .rri_s, .rri_u => inst.data.rri.fixes,
-        .ri_s, .ri_u, .ri_64 => inst.data.ri.fixes,
+        .ri_s, .ri_u, .ri_64, .ir => inst.data.ri.fixes,
         .rm, .rmi_s, .mr => inst.data.rx.fixes,
         .mrr, .rrm, .rmr => inst.data.rrx.fixes,
         .rmi, .mri => inst.data.rix.fixes,
@@ -575,8 +579,6 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
         else
             .none,
     }, mnemonic: {
-        @setEvalBranchQuota(2_000);
-
         comptime var max_len = 0;
         inline for (@typeInfo(Mnemonic).@"enum".fields) |field| max_len = @max(field.name.len, max_len);
         var buf: [max_len]u8 = undefined;
@@ -598,6 +600,14 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
         .i_s, .i_u => &.{
             .{ .imm = lower.imm(inst.ops, inst.data.i.i) },
         },
+        .ii => &.{
+            .{ .imm = lower.imm(inst.ops, inst.data.ii.i1) },
+            .{ .imm = lower.imm(inst.ops, inst.data.ii.i2) },
+        },
+        .ir => &.{
+            .{ .imm = lower.imm(inst.ops, inst.data.ri.i) },
+            .{ .reg = inst.data.ri.r1 },
+        },
         .r => &.{
             .{ .reg = inst.data.r.r1 },
         },
src/arch/x86_64/Mir.zig
@@ -23,8 +23,82 @@ pub const Inst = struct {
         /// ___
         @"_",
 
+        /// ___ 0
+        _0,
+        /// ___ 1
+        _1,
+        /// ___ 2
+        _2,
+        /// ___ 3
+        _3,
+        /// ___ 4
+        _4,
+
+        /// System Call ___
+        sys_,
+
+        /// ___ crement Shadow Stack Pointer Doubleword
+        _csspd,
+        /// ___ crement Shadow Stack Pointer Quadword
+        _csspq,
+        /// ___ FS Segment Base
+        _fsbase,
+        /// ___ GS Segment Base
+        _gsbase,
+        /// ___ Model Specific Register
+        _msr,
+        /// ___ MXCSR
+        _mxcsr,
+        /// ___ Processor ID
+        _pid,
+        /// ___ Protection Key Rights For User Pages
+        _pkru,
+        /// ___ Performance-Monitoring Counters
+        _pmc,
+        /// ___ Rondam Number
+        _rand,
+        /// ___ Rondam Seed
+        _seed,
+        /// ___ Shadow Stack Pointer Doubleword
+        _sspd,
+        /// ___ Shadow Stack Pointer Quadword
+        _sspq,
+        /// ___ Time-Stamp Counter
+        _tsc,
+        /// ___ Time-Stamp Counter And Processor ID
+        _tscp,
+        /// VEX-Encoded ___ MXCSR
+        v_mxcsr,
+
+        /// Interrupt ___
         /// Integer ___
         i_,
+        /// Interrupt ___ Word
+        i_w,
+        /// Interrupt ___ Doubleword
+        i_d,
+        /// Interrupt ___ Quadword
+        i_q,
+        /// User-Interrupt ___
+        ui_,
+
+        /// ___ mp
+        _mp,
+        /// ___ if CX register is 0
+        _cxz,
+        /// ___ if ECX register is 0
+        _ecxz,
+        /// ___ if RCX register is 0
+        _rcxz,
+
+        /// ___ Addition
+        _a,
+        /// ___ Subtraction
+        _s,
+        /// ___ Multiply
+        _m,
+        /// ___ Division
+        _d,
 
         /// ___ Left
         _l,
@@ -33,6 +107,8 @@ pub const Inst = struct {
         /// ___ Left Without Affecting Flags
         _lx,
         /// ___ Right
+        /// ___ For Reading
+        /// ___ Register
         _r,
         /// ___ Right Double
         _rd,
@@ -45,7 +121,7 @@ pub const Inst = struct {
         //_r,
 
         /// ___ Above
-        _a,
+        //_a,
         /// ___ Above Or Equal
         _ae,
         /// ___ Below
@@ -102,7 +178,7 @@ pub const Inst = struct {
         /// ___ Parity Odd
         _po,
         /// ___ Sign
-        _s,
+        //_s,
         /// ___ Zero
         _z,
         /// ___ Alignment Check Flag
@@ -111,15 +187,18 @@ pub const Inst = struct {
         //_d,
         /// ___ Interrupt Flag
         _i,
+        /// ___ Task-Switched Flag In CR0
+        _ts,
         /// ___ User Interrupt Flag
         _ui,
 
         /// ___ Byte
         //_b,
         /// ___ Word
+        /// ___ For Writing
         _w,
         /// ___ Doubleword
-        _d,
+        //_d,
         /// ___ QuadWord
         _q,
 
@@ -214,8 +293,72 @@ pub const Inst = struct {
 
         /// Float ___
         f_,
+        /// Float ___ +1.0
+        /// Float ___ 1
+        f_1,
+        /// Float ___ Below
+        f_b,
+        /// Float ___ Below Or Equal
+        f_be,
+        /// Float ___ Control Word
+        f_cw,
+        /// Float ___ Equal
+        f_e,
+        /// Float ___ Environment
+        f_env,
+        /// Float ___ log_2(e)
+        f_l2e,
+        /// Float ___ log_2(10)
+        f_l2t,
+        /// Float ___ log_10(2)
+        f_lg2,
+        /// Float ___ log_e(2)
+        f_ln2,
+        /// Float ___ Not Below
+        f_nb,
+        /// Float ___ Not Below Or Equal
+        f_nbe,
+        /// Float ___ Not Equal
+        f_ne,
+        /// Float ___ Not Unordered
+        f_nu,
         /// Float ___ Pop
         f_p,
+        /// Float ___ +1
+        f_p1,
+        /// Float ___ ฯ€
+        f_pi,
+        /// Float ___ Pop Pop
+        f_pp,
+        /// Float ___ stack-top pointer
+        f_stp,
+        /// Float ___ Status Word
+        f_sw,
+        /// Float ___ Unordered
+        f_u,
+        /// Float ___ +0.0
+        f_z,
+        /// Float BCD ___
+        fb_,
+        /// Float BCD ___ Pop
+        fb_p,
+        /// Float And Integer ___
+        fi_,
+        /// Float And Integer ___ Pop
+        fi_p,
+        /// Float No Wait ___
+        fn_,
+        /// Float No Wait ___ Control Word
+        fn_cw,
+        /// Float No Wait ___ Environment
+        fn_env,
+        /// Float No Wait ___ status word
+        fn_sw,
+
+        /// ___ in 32-bit and Compatibility Mode
+        _32,
+        /// ___ in 64-bit Mode
+        _64,
 
         /// Packed ___
         p_,
@@ -243,6 +386,24 @@ pub const Inst = struct {
         /// ___ Packed Double-Precision Values
         _pd,
 
+        /// ___ Internal Caches
+        //_d,
+        /// ___ TLB Entries
+        _lpg,
+        /// ___ Process-Context Identifier
+        _pcid,
+
+        /// Load ___
+        l_,
+        /// Memory ___
+        m_,
+        /// Store ___
+        s_,
+        /// Timed ___
+        t_,
+        /// User Level Monitor ___
+        um_,
+
         /// VEX-Encoded ___
         v_,
         /// VEX-Encoded ___ Byte
@@ -282,6 +443,19 @@ pub const Inst = struct {
         /// VEX-Encoded ___ 128-Bits Of Floating-Point Data
         v_f128,
 
+        /// ___ 128-bit key with key locker
+        _128,
+        /// ___ 256-bit key with key locker
+        _256,
+        /// ___ with key locker using 128-bit key
+        _128kl,
+        /// ___ with key locker using 256-bit key
+        _256kl,
+        /// ___ with key locker on 8 blocks using 128-bit key
+        _wide128kl,
+        /// ___ with key locker on 8 blocks using 256-bit key
+        _wide256kl,
+
         /// Mask ___ Byte
         k_b,
         /// Mask ___ Word
@@ -300,6 +474,12 @@ pub const Inst = struct {
     };
 
     pub const Tag = enum(u8) {
+        // General-purpose
+        /// ASCII adjust al after addition
+        /// ASCII adjust ax before division
+        /// ASCII adjust ax after multiply
+        /// ASCII adjust al after subtraction
+        aa,
         /// Add with carry
         adc,
         /// Add
@@ -313,6 +493,8 @@ pub const Inst = struct {
         /// Bitwise logical and of packed single-precision floating-point values
         /// Bitwise logical and of packed double-precision floating-point values
         @"and",
+        /// Adjust RPL field of segment selector
+        arpl,
         /// Bit scan forward
         /// Bit scan reverse
         bs,
@@ -324,6 +506,7 @@ pub const Inst = struct {
         /// Bit test and set
         bt,
         /// Call
+        /// Fast system call
         call,
         /// Convert byte to word
         cbw,
@@ -331,12 +514,25 @@ pub const Inst = struct {
         cdq,
         /// Convert doubleword to quadword
         cdqe,
+        /// Clear AC flag in EFLAGS register
         /// Clear carry flag
         /// Clear direction flag
         /// Clear interrupt flag
+        /// Clear task-switched flag in CR0
+        /// Clear user interrupt flag
         cl,
+        /// Cache line demote
+        cldemote,
         /// Flush cache line
         clflush,
+        /// Flush cache line optimized
+        clflushopt,
+        /// Clear busy flag in a supervisor shadow stack token
+        clrssbsy,
+        /// Cache line write back
+        clwb,
+        /// Complement carry flag
+        cmc,
         /// Conditional move
         cmov,
         /// Logical compare
@@ -355,33 +551,79 @@ pub const Inst = struct {
         cwd,
         /// Convert word to doubleword
         cwde,
+        /// Decimal adjust AL after addition
+        /// Decimal adjust AL after subtraction
+        da,
         /// Decrement by 1
-        dec,
+        /// Decrement shadow stack pointer
+        de,
         /// Unsigned division
         /// Signed division
+        /// Divide
         /// Divide packed single-precision floating-point values
         /// Divide scalar single-precision floating-point values
         /// Divide packed double-precision floating-point values
         /// Divide scalar double-precision floating-point values
         div,
+        /// Terminate and indirect branch in 32-bit and compatibility mode
+        /// Terminate and indirect branch in 64-bit mode
+        endbr,
+        /// Enqueue command
+        /// Enqueue command supervisor
+        enqcmd,
+        /// Make stack frame for procedure parameters
+        /// Fast system call
+        enter,
+        /// Fast return from fast system call
+        exit,
+        /// Load fence
+        /// Memory fence
+        /// Store fence
+        fence,
+        /// Halt
+        hlt,
+        /// History reset
+        hreset,
+        /// Input from port
+        /// Input from port to string
         /// Increment by 1
-        inc,
+        /// Increment shadow stack pointer
+        in,
         /// Call to interrupt procedure
-        int3,
+        int,
+        /// Invalidate internal caches
+        /// Invalidate TLB entries
+        /// Invalidate process-context identifier
+        inv,
         /// Conditional jump
-        j,
         /// Jump
-        jmp,
+        j,
+        /// Load status flags into AH register
+        lahf,
+        /// Load access right byte
+        lar,
         /// Load effective address
         lea,
+        /// High level procedure exit
+        leave,
+        /// Load global descriptor table register
+        lgdt,
+        /// Load interrupt descriptor table register
+        lidt,
+        /// Load local descriptor table register
+        lldt,
+        /// Load machine status word
+        lmsw,
         /// Load string
         lod,
-        /// Load fence
-        lfence,
+        /// Loop according to ECX counter
+        loop,
+        /// Load segment limit
+        lsl,
+        /// Load task register
+        ltr,
         /// Count the number of leading zero bits
         lzcnt,
-        /// Memory fence
-        mfence,
         /// Move
         /// Move data from string to string
         /// Move scalar single-precision floating-point value
@@ -407,6 +649,7 @@ pub const Inst = struct {
         /// Two's complement negation
         neg,
         /// No-op
+        /// No operation
         nop,
         /// One's complement negation
         not,
@@ -414,39 +657,62 @@ pub const Inst = struct {
         /// Bitwise logical or of packed single-precision floating-point values
         /// Bitwise logical or of packed double-precision floating-point values
         @"or",
+        /// Output to port
+        /// Output string to port
+        out,
         /// Spin loop hint
+        /// Timed pause
         pause,
         /// Pop
         pop,
         /// Return the count of number of bits set to 1
         popcnt,
         /// Pop stack into EFLAGS register
-        popfq,
+        popf,
         /// Push
         push,
         /// Push EFLAGS register onto the stack
-        pushfq,
+        pushf,
         /// Rotate left through carry
         /// Rotate right through carry
         rc,
+        /// Read FS segment base
+        /// Read GS segment base
+        /// Read from model specific register
+        /// Read processor ID
+        /// Read protection key rights for user pages
+        /// Read performance-monitoring counters
+        /// Read random number
+        /// Read random seed
+        /// Read shadow stack pointer
+        /// Read time-stamp counter
+        /// Read time-stamp counter and processor ID
+        rd,
         /// Return
+        /// Return from fast system call
+        /// Interrupt return
+        /// User-interrupt return
         ret,
         /// Rotate left
         /// Rotate right
         /// Rotate right logical without affecting flags
         ro,
+        /// Resume from system management mode
+        rsm,
         /// Arithmetic shift left
         /// Arithmetic shift right
         /// Shift left arithmetic without affecting flags
         sa,
+        /// Store AH into flags
+        sahf,
         /// Integer subtraction with borrow
         sbb,
         /// Scan string
         sca,
+        /// Send user interprocessor interrupt
+        senduipi,
         /// Set byte on condition
         set,
-        /// Store fence
-        sfence,
         /// Logical shift left
         /// Double precision shift left
         /// Logical shift right
@@ -454,6 +720,12 @@ pub const Inst = struct {
         /// Shift left logical without affecting flags
         /// Shift right logical without affecting flags
         sh,
+        /// Store interrupt descriptor table register
+        sidt,
+        /// Store local descriptor table register
+        sldt,
+        /// Store machine status word
+        smsw,
         /// Subtract
         /// Subtract packed integers
         /// Subtract packed single-precision floating-point values
@@ -464,46 +736,128 @@ pub const Inst = struct {
         /// Set carry flag
         /// Set direction flag
         /// Set interrupt flag
+        /// Store binary coded decimal integer and pop
         /// Store floating-point value
+        /// Store integer
+        /// Store x87 FPU control word
+        /// Store x87 FPU environment
+        /// Store x87 FPU status word
+        /// Store MXCSR register state
         st,
         /// Store string
         sto,
-        /// Syscall
-        syscall,
+        /// Swap GS base register
+        swapgs,
         /// Test condition
         @"test",
-        /// Count the number of trailing zero bits
-        tzcnt,
         /// Undefined instruction
-        ud2,
+        ud,
+        /// User level set up monitor address
+        umonitor,
+        /// Verify a segment for reading
+        /// Verify a segment for writing
+        ver,
+        /// Write to model specific register
+        /// Write to model specific register
+        /// Write to model specific register
+        wr,
         /// Exchange and add
         xadd,
         /// Exchange register/memory with register
-        xchg,
+        /// Exchange register contents
+        xch,
         /// Get value of extended control register
         xgetbv,
+        /// Table look-up translation
+        xlat,
         /// Logical exclusive-or
         /// Bitwise logical xor of packed single-precision floating-point values
         /// Bitwise logical xor of packed double-precision floating-point values
         xor,
 
+        // X87
+        /// Compute 2^x-1
+        @"2xm1",
         /// Absolute value
         abs,
         /// Change sign
         chs,
+        /// Clear exceptions
+        clex,
+        /// Compare floating-point values
+        com,
+        /// Compare floating-point values and set EFLAGS
+        /// Compare scalar ordered single-precision floating-point values
+        /// Compare scalar ordered double-precision floating-point values
+        comi,
+        /// Cosine
+        cos,
+        /// Decrement stack-top pointer
+        decstp,
+        /// Reverse divide
+        divr,
         /// Free floating-point register
         free,
-        /// Store integer with truncation
-        istt,
+        /// Increment stack-top pointer
+        incstp,
+        /// Initialize floating-point unit
+        init,
+        /// Load binary coded decimal integer
         /// Load floating-point value
-        ld,
+        /// Load integer
+        /// Load constant
+        /// Load x87 FPU control word
         /// Load x87 FPU environment
-        ldenv,
-        /// Store x87 FPU environment
-        nstenv,
-        /// Store x87 FPU environment
-        stenv,
-
+        /// Load MXCSR register state
+        ld,
+        /// Partial arctangent
+        patan,
+        /// Partial remainder
+        prem,
+        /// Partial tangent
+        ptan,
+        /// Round to integer
+        rndint,
+        /// Restore x87 FPU state
+        rstor,
+        /// Store x87 FPU state
+        save,
+        /// Scale
+        scale,
+        /// Sine
+        sin,
+        /// Sine and cosine
+        sincos,
+        /// Square root
+        /// Square root of packed single-precision floating-point values
+        /// Square root of scalar single-precision floating-point value
+        /// Square root of packed double-precision floating-point values
+        /// Square root of scalar double-precision floating-point value
+        sqrt,
+        /// Store integer with truncation
+        stt,
+        /// Reverse subtract
+        subr,
+        /// Test
+        tst,
+        /// Unordered compare floating-point values
+        ucom,
+        /// Unordered compare floating-point values and set EFLAGS
+        /// Unordered compare scalar single-precision floating-point values
+        /// Unordered compare scalar double-precision floating-point values
+        ucomi,
+        /// Wait
+        /// User level monitor wait
+        wait,
+        /// Examine floating-point
+        xam,
+        /// Extract exponent and significand
+        xtract,
+        /// Compute y * log2x
+        /// Compute y * log2(x + 1)
+        yl2x,
+
+        // MMX
         /// Pack with signed saturation
         ackssw,
         /// Pack with signed saturation
@@ -514,6 +868,7 @@ pub const Inst = struct {
         adds,
         /// Add packed unsigned integers with unsigned saturation
         addus,
+        /// Logical and not
         /// Bitwise logical and not of packed single-precision floating-point values
         /// Bitwise logical and not of packed double-precision floating-point values
         andn,
@@ -521,18 +876,8 @@ pub const Inst = struct {
         cmpeq,
         /// Compare packed data for greater than
         cmpgt,
-        /// Maximum of packed signed integers
-        maxs,
-        /// Maximum of packed unsigned integers
-        maxu,
-        /// Minimum of packed signed integers
-        mins,
-        /// Minimum of packed unsigned integers
-        minu,
-        /// Move byte mask
-        /// Extract packed single precision floating-point sign mask
-        /// Extract packed double precision floating-point sign mask
-        movmsk,
+        /// Empty MMX technology state
+        emms,
         /// Multiply packed signed integers and store low result
         mull,
         /// Multiply packed signed integers and store high result
@@ -547,12 +892,20 @@ pub const Inst = struct {
         subs,
         /// Subtract packed unsigned integers with unsigned saturation
         subus,
+        /// Unpack high data
+        unpckhbw,
+        /// Unpack high data
+        unpckhdq,
+        /// Unpack high data
+        unpckhwd,
+        /// Unpack low data
+        unpcklbw,
+        /// Unpack low data
+        unpckldq,
+        /// Unpack low data
+        unpcklwd,
 
-        /// Load MXCSR register
-        ldmxcsr,
-        /// Store MXCSR register state
-        stmxcsr,
-
+        // SSE
         /// Convert packed doubleword integers to packed single-precision floating-point values
         /// Convert packed doubleword integers to packed double-precision floating-point values
         cvtpi2,
@@ -567,17 +920,38 @@ pub const Inst = struct {
         cvttps2pi,
         /// Convert with truncation scalar single-precision floating-point value to doubleword integer
         cvttss2si,
-
+        /// Extract byte
+        /// Extract word
+        /// Extract doubleword
+        /// Extract quadword
+        extr,
+        /// Restore x87 FPU, MMX, XMM, and MXCSR state
+        fxrstor,
+        /// Save x87 FPU, MMX technology, and MXCSR state
+        fxsave,
+        /// Insert byte
+        /// Insert word
+        /// Insert doubleword
+        /// Insert quadword
+        insr,
         /// Maximum of packed single-precision floating-point values
         /// Maximum of scalar single-precision floating-point values
         /// Maximum of packed double-precision floating-point values
         /// Maximum of scalar double-precision floating-point values
         max,
+        /// Maximum of packed signed integers
+        maxs,
+        /// Maximum of packed unsigned integers
+        maxu,
         /// Minimum of packed single-precision floating-point values
         /// Minimum of scalar single-precision floating-point values
         /// Minimum of packed double-precision floating-point values
         /// Minimum of scalar double-precision floating-point values
         min,
+        /// Minimum of packed signed integers
+        mins,
+        /// Minimum of packed unsigned integers
+        minu,
         /// Move aligned packed single-precision floating-point values
         /// Move aligned packed double-precision floating-point values
         mova,
@@ -591,27 +965,18 @@ pub const Inst = struct {
         movl,
         /// Move packed single-precision floating-point values low to high
         movlh,
+        /// Move byte mask
+        /// Extract packed single precision floating-point sign mask
+        /// Extract packed double precision floating-point sign mask
+        movmsk,
         /// Move unaligned packed single-precision floating-point values
         /// Move unaligned packed double-precision floating-point values
         movu,
-        /// Extract byte
-        /// Extract word
-        /// Extract doubleword
-        /// Extract quadword
-        extr,
-        /// Insert byte
-        /// Insert word
-        /// Insert doubleword
-        /// Insert quadword
-        insr,
-        /// Square root of packed single-precision floating-point values
-        /// Square root of scalar single-precision floating-point value
-        /// Square root of packed double-precision floating-point values
-        /// Square root of scalar double-precision floating-point value
-        sqrt,
-        /// Unordered compare scalar single-precision floating-point values
-        /// Unordered compare scalar double-precision floating-point values
-        ucomi,
+        /// Packed interleave shuffle of quadruplets of single-precision floating-point values
+        /// Packed interleave shuffle of pairs of double-precision floating-point values
+        /// Shuffle packed doublewords
+        /// Shuffle packed words
+        shuf,
         /// Unpack and interleave high packed single-precision floating-point values
         /// Unpack and interleave high packed double-precision floating-point values
         unpckh,
@@ -619,6 +984,7 @@ pub const Inst = struct {
         /// Unpack and interleave low packed double-precision floating-point values
         unpckl,
 
+        // SSE2
         /// Convert packed doubleword integers to packed single-precision floating-point values
         /// Convert packed doubleword integers to packed double-precision floating-point values
         cvtdq2,
@@ -646,32 +1012,28 @@ pub const Inst = struct {
         cvttps2dq,
         /// Convert with truncation scalar double-precision floating-point value to doubleword integer
         cvttsd2si,
-        /// Packed interleave shuffle of quadruplets of single-precision floating-point values
-        /// Packed interleave shuffle of pairs of double-precision floating-point values
-        /// Shuffle packed doublewords
-        /// Shuffle packed words
-        shuf,
+        /// Galois field affine transformation inverse
+        gf2p8affineinvq,
+        /// Galois field affine transformation
+        gf2p8affineq,
+        /// Galois field multiply bytes
+        gf2p8mul,
         /// Shuffle packed high words
         shufh,
         /// Shuffle packed low words
         shufl,
         /// Unpack high data
-        unpckhbw,
-        /// Unpack high data
-        unpckhdq,
-        /// Unpack high data
         unpckhqdq,
-        /// Unpack high data
-        unpckhwd,
-        /// Unpack low data
-        unpcklbw,
-        /// Unpack low data
-        unpckldq,
         /// Unpack low data
         unpcklqdq,
-        /// Unpack low data
-        unpcklwd,
 
+        // SSE3
+        /// Packed single-precision floating-point add/subtract
+        /// Packed double-precision floating-point add/subtract
+        addsub,
+        /// Packed single-precision floating-point horizontal add
+        /// Packed double-precision floating-point horizontal add
+        hadd,
         /// Replicate double floating-point values
         movddup,
         /// Replicate single floating-point values
@@ -679,9 +1041,11 @@ pub const Inst = struct {
         /// Replicate single floating-point values
         movsldup,
 
+        // SSSE3
         /// Packed align right
         alignr,
 
+        // SSE4.1
         /// Pack with unsigned saturation
         ackusd,
         /// Blend packed single-precision floating-point values
@@ -694,6 +1058,9 @@ pub const Inst = struct {
         /// Variable blend packed double-precision floating-point values
         /// Variable blend scalar double-precision floating-point values
         blendv,
+        /// Dot product of packed single-precision floating-point values
+        /// Dot product of packed double-precision floating-point values
+        dp,
         /// Extract packed floating-point values
         /// Extract packed integer values
         extract,
@@ -714,14 +1081,28 @@ pub const Inst = struct {
         /// Round scalar double-precision floating-point value
         round,
 
+        // SSE4.2
+        /// Accumulate CRC32 value
+        crc32,
+
+        // PCLMUL
         /// Carry-less multiplication quadword
         clmulq,
 
+        // AES
         /// Perform one round of an AES decryption flow
+        /// Perform ten rounds of AES decryption flow with key locker using 128-bit key
+        /// Perform ten rounds of AES decryption flow with key locker using 256-bit key
+        /// Perform ten rounds of AES decryption flow with key locker on 8 blocks using 128-bit key
+        /// Perform ten rounds of AES decryption flow with key locker on 8 blocks using 256-bit key
         aesdec,
         /// Perform last round of an AES decryption flow
         aesdeclast,
         /// Perform one round of an AES encryption flow
+        /// Perform ten rounds of AES encryption flow with key locker using 128-bit key
+        /// Perform ten rounds of AES encryption flow with key locker using 256-bit key
+        /// Perform ten rounds of AES encryption flow with key locker on 8 blocks using 128-bit key
+        /// Perform ten rounds of AES encryption flow with key locker on 8 blocks using 256-bit key
         aesenc,
         /// Perform last round of an AES encryption flow
         aesenclast,
@@ -730,22 +1111,42 @@ pub const Inst = struct {
         /// AES round key generation assist
         aeskeygenassist,
 
+        // SHA
+        /// Perform four rounds of SHA1 operation
+        sha1rnds,
+        /// Calculate SHA1 state variable E after four rounds
+        sha1nexte,
+        /// Perform an intermediate calculation for the next four SHA1 message dwords
+        /// Perform a final calculation for the next four SHA1 message dwords
+        sha1msg,
         /// Perform an intermediate calculation for the next four SHA256 message dwords
-        sha256msg1,
         /// Perform a final calculation for the next four SHA256 message dwords
-        sha256msg2,
+        sha256msg,
         /// Perform two rounds of SHA256 operation
-        sha256rnds2,
-
+        sha256rnds,
+
+        // AVX
+        /// Bit field extract
+        bextr,
+        /// Extract lowest set isolated bit
+        /// Get mask up to lowest set bit
+        /// Reset lowest set bit
+        bls,
         /// Load with broadcast floating-point data
         /// Load integer and broadcast
         broadcast,
+        /// Zero high bits starting with specified bit position
+        bzhi,
+        /// Count the number of trailing zero bits
+        tzcnt,
 
+        // F16C
         /// Convert 16-bit floating-point values to single-precision floating-point values
         cvtph2,
         /// Convert single-precision floating-point values to 16-bit floating-point values
         cvtps2ph,
 
+        // FMA
         /// Fused multiply-add of packed single-precision floating-point values
         /// Fused multiply-add of scalar single-precision floating-point values
         /// Fused multiply-add of packed double-precision floating-point values
@@ -762,6 +1163,19 @@ pub const Inst = struct {
         /// Fused multiply-add of scalar double-precision floating-point values
         fmadd231,
 
+        // ADX
+        /// Unsigned integer addition of two operands with carry flag
+        adcx,
+        /// Unsigned integer addition of two operands with overflow flag
+        adox,
+
+        // AESKLE
+        /// Encode 128-bit key with key locker
+        /// Encode 256-bit key with key locker
+        encodekey,
+        /// Load internal wrapping key with key locker
+        loadiwkey,
+
         /// A pseudo instruction that requires special lowering.
         /// This should be the only tag in this enum that doesn't
         /// directly correspond to one or more instruction mnemonics.
@@ -804,11 +1218,17 @@ pub const Inst = struct {
         /// Uses `ri` payload with `i` index of extra data of type `Imm64`.
         ri_64,
         /// Immediate (sign-extended) operand.
-        /// Uses `imm` payload.
+        /// Uses `i` payload.
         i_s,
         /// Immediate (unsigned) operand.
-        /// Uses `imm` payload.
+        /// Uses `i` payload.
         i_u,
+        /// Immediate (word), immediate (byte) operands.
+        /// Uses `ii` payload.
+        ii,
+        /// Immediate (byte), register operands.
+        /// Uses `ri` payload.
+        ir,
         /// Relative displacement operand.
         /// Uses `reloc` payload.
         rel,
@@ -1036,6 +1456,11 @@ pub const Inst = struct {
             fixes: Fixes = ._,
             i: u32,
         },
+        ii: struct {
+            fixes: Fixes = ._,
+            i1: u16,
+            i2: u8,
+        },
         r: struct {
             fixes: Fixes = ._,
             r1: Register,
@@ -1244,7 +1669,7 @@ pub const Memory = struct {
         size: bits.Memory.Size,
         index: Register,
         scale: bits.Memory.Scale,
-        _: u15 = undefined,
+        _: u14 = undefined,
     };
 
     pub fn encode(mem: bits.Memory) Memory {