Commit e4c049e410

Jacob Young <jacobly0@users.noreply.github.com>
2025-01-27 19:37:08
x86_64: rewrite comparisons
1 parent fecdc53
Changed files (3)
src
test
behavior
x86_64
src/arch/x86_64/bits.zig
@@ -159,7 +159,6 @@ pub const Condition = enum(u5) {
             .ae => .be,
             .b => .a,
             .be => .ae,
-            .c => .a,
             .g => .l,
             .ge => .le,
             .l => .g,
@@ -168,7 +167,6 @@ pub const Condition = enum(u5) {
             .nae => .nbe,
             .nb => .na,
             .nbe => .nae,
-            .nc => .na,
             .ng => .nl,
             .nge => .nle,
             .nl => .ng,
src/arch/x86_64/CodeGen.zig
@@ -635,6 +635,7 @@ const InstTracking = struct {
         }
         tracking_log.debug("spill {} from {} to {}", .{ inst, self.short, self.long });
         try cg.genCopy(cg.typeOfIndex(inst), self.long, self.short, .{});
+        for (self.short.getRegs()) |reg| if (reg.class() == .x87) try cg.asmRegister(.{ .f_, .free }, reg);
     }
 
     fn reuseFrame(self: *InstTracking) void {
@@ -649,7 +650,21 @@ const InstTracking = struct {
     }
 
     fn trackSpill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void {
-        try function.freeValue(self.short);
+        switch (self.short) {
+            .register => |reg| function.register_manager.freeReg(reg),
+            inline .register_pair,
+            .register_triple,
+            .register_quadruple,
+            => |regs| for (regs) |reg| function.register_manager.freeReg(reg),
+            .register_offset, .indirect => |reg_off| function.register_manager.freeReg(reg_off.reg),
+            .register_overflow => |reg_ov| {
+                function.register_manager.freeReg(reg_ov.reg);
+                function.eflags_inst = null;
+            },
+            .register_mask => |reg_mask| function.register_manager.freeReg(reg_mask.reg),
+            .eflags => function.eflags_inst = null,
+            else => {}, // TODO process stack allocation death
+        }
         self.reuseFrame();
         tracking_log.debug("{} => {} (spilled)", .{ inst, self.* });
     }
@@ -1063,10 +1078,16 @@ pub fn generateLazy(
         .end_di_column = undefined, // no debug info yet
     };
     defer {
+        function.inst_tracking.deinit(gpa);
         function.mir_instructions.deinit(gpa);
         function.mir_extra.deinit(gpa);
         function.mir_table.deinit(gpa);
     }
+    try function.inst_tracking.ensureTotalCapacity(gpa, Temp.Index.max);
+    for (0..Temp.Index.max) |temp_index| {
+        const temp: Temp.Index = @enumFromInt(temp_index);
+        function.inst_tracking.putAssumeCapacityNoClobber(temp.toIndex(), .init(.none));
+    }
 
     function.genLazy(lazy_sym) catch |err| switch (err) {
         error.CodegenFail => return error.CodegenFail,
@@ -2412,7 +2433,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
 
             try cg.airArg(inst);
 
-            cg.resetTemps();
+            try cg.resetTemps();
             cg.checkInvariantsAfterAirInst();
         },
         else => break,
@@ -6348,7 +6369,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .@"1:", .f_p, .st, .dst0t, ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", .x87, null, null },
+                    .required_features = .{ .x87, null, null, null },
                     .src_constraints = .{
                         .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
                         .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
@@ -6375,58 +6396,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
                         .{ ._, .f_, .xam, ._, ._, ._, ._ },
                         .{ ._, .fn_sw, .st, .tmp1w, ._, ._, ._ },
-                        .{ ._, ._, .@"test", .tmp1b, .si(0b0_1_000_100), ._, ._ },
+                        .{ ._, ._, .@"test", .tmp1b, .si(0b1_000_100), ._, ._ },
                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
                         .{ ._, .f_, .xch, .src1t, ._, ._, ._ },
                         .{ ._, .f_, .ucom, .src1t, ._, ._, ._ },
                         .{ ._, .fn_sw, .st, .tmp1w, ._, ._, ._ },
                         .{ ._, .f_, .xch, .src1t, ._, ._, ._ },
-                        .{ ._, ._, .@"test", .tmp1b, .si(0b0_0_000_001), ._, ._ },
+                        .{ ._, ._, .@"test", .tmp1b, .si(0b0_000_001), ._, ._ },
                         .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                         .{ .@"0:", .f_p, .st, .tmp0t, ._, ._, ._ },
                         .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
                         .{ .@"1:", .f_p, .st, .dst0t, ._, ._, ._ },
                     } },
-                }, .{
-                    .required_features = .{ .x87, null, null, null },
-                    .src_constraints = .{
-                        .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
-                        .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
-                    },
-                    .patterns = &.{
-                        .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_x87 } },
-                        .{ .src = .{ .to_x87, .to_x87 } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                        .{ .type = .u8, .kind = .{ .reg = .ah } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src1, .rc = .x87 } }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                        .{ ._, .f_, .ucom, .tmp0t, ._, ._, ._ },
-                        .{ ._, .fn_sw, .st, .tmp1w, ._, ._, ._ },
-                        .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        .{ ._, ._p, .j, .@"0f", ._, ._, ._ },
-                        .{ ._, .f_, .xch, .src1t, ._, ._, ._ },
-                        .{ ._, .f_, .ucom, .src1t, ._, ._, ._ },
-                        .{ ._, .fn_sw, .st, .tmp1w, ._, ._, ._ },
-                        .{ ._, .f_, .xch, .src1t, ._, ._, ._ },
-                        .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        .{ ._, ._b, .j, .@"1f", ._, ._, ._ },
-                        .{ .@"0:", .f_p, .st, .tmp0t, ._, ._, ._ },
-                        .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
-                        .{ .@"1:", .f_p, .st, .dst0t, ._, ._, ._ },
-                    } },
                 }, .{
                     .required_features = .{ .x87, .cmov, null, null },
                     .src_constraints = .{
@@ -6508,7 +6489,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", .x87, null, null },
+                    .required_features = .{ .x87, null, null, null },
                     .src_constraints = .{
                         .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
                         .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
@@ -6535,13 +6516,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
                         .{ ._, .f_, .xam, ._, ._, ._, ._ },
                         .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ },
-                        .{ ._, ._, .@"test", .tmp3b, .si(0b0_1_000_100), ._, ._ },
+                        .{ ._, ._, .@"test", .tmp3b, .si(0b1_000_100), ._, ._ },
                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
                         .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ },
                         .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ },
                         .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ },
                         .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ },
-                        .{ ._, ._, .@"test", .tmp3b, .si(0b0_0_000_001), ._, ._ },
+                        .{ ._, ._, .@"test", .tmp3b, .si(0b0_000_001), ._, ._ },
                         .{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
                         .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ },
                         .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ },
@@ -6550,49 +6531,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
-                }, .{
-                    .required_features = .{ .x87, null, null, null },
-                    .src_constraints = .{
-                        .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
-                        .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
-                    },
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .to_mem } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                        .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                        .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                        .{ .type = .u8, .kind = .{ .reg = .ah } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{.mem},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ },
-                        .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
-                        .{ ._, .f_, .ucom, .tmp1t, ._, ._, ._ },
-                        .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ },
-                        .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        .{ ._, ._p, .j, .@"1f", ._, ._, ._ },
-                        .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ },
-                        .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ },
-                        .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ },
-                        .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ },
-                        .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        .{ ._, ._b, .j, .@"2f", ._, ._, ._ },
-                        .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ },
-                        .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ },
-                        .{ .@"2:", .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ },
-                        .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ },
-                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
-                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                    } },
                 }, .{
                     .required_features = .{ .sse, null, null, null },
                     .src_constraints = .{
@@ -10271,7 +10209,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .@"1:", .f_p, .st, .dst0t, ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", .x87, null, null },
+                    .required_features = .{ .x87, null, null, null },
                     .src_constraints = .{
                         .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
                         .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
@@ -10298,54 +10236,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
                         .{ ._, .f_, .xam, ._, ._, ._, ._ },
                         .{ ._, .fn_sw, .st, .tmp1w, ._, ._, ._ },
-                        .{ ._, ._, .@"test", .tmp1b, .si(0b0_1_000_100), ._, ._ },
+                        .{ ._, ._, .@"test", .tmp1b, .si(0b1_000_100), ._, ._ },
                         .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
                         .{ ._, .f_, .ucom, .src1t, ._, ._, ._ },
                         .{ ._, .fn_sw, .st, .tmp1w, ._, ._, ._ },
-                        .{ ._, ._, .@"test", .tmp1b, .si(0b0_0_000_001), ._, ._ },
+                        .{ ._, ._, .@"test", .tmp1b, .si(0b0_000_001), ._, ._ },
                         .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                         .{ .@"0:", .f_p, .st, .tmp0t, ._, ._, ._ },
                         .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
                         .{ .@"1:", .f_p, .st, .dst0t, ._, ._, ._ },
                     } },
-                }, .{
-                    .required_features = .{ .x87, null, null, null },
-                    .src_constraints = .{
-                        .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
-                        .{ .scalar_float = .{ .of = .xword, .is = .tbyte } },
-                    },
-                    .patterns = &.{
-                        .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_x87 } },
-                        .{ .src = .{ .to_x87, .to_x87 } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                        .{ .type = .u8, .kind = .{ .reg = .ah } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src1, .rc = .x87 } }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                        .{ ._, .f_, .ucom, .tmp0t, ._, ._, ._ },
-                        .{ ._, .fn_sw, .st, .tmp1w, ._, ._, ._ },
-                        .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        .{ ._, ._p, .j, .@"0f", ._, ._, ._ },
-                        .{ ._, .f_, .ucom, .src1t, ._, ._, ._ },
-                        .{ ._, .fn_sw, .st, .tmp1w, ._, ._, ._ },
-                        .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        .{ ._, ._b, .j, .@"1f", ._, ._, ._ },
-                        .{ .@"0:", .f_p, .st, .tmp0t, ._, ._, ._ },
-                        .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
-                        .{ .@"1:", .f_p, .st, .dst0t, ._, ._, ._ },
-                    } },
                 }, .{
                     .required_features = .{ .x87, .cmov, null, null },
                     .src_constraints = .{
@@ -10423,7 +10323,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", .x87, null, null },
+                    .required_features = .{ .x87, null, null, null },
                     .src_constraints = .{
                         .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
                         .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
@@ -10450,11 +10350,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
                         .{ ._, .f_, .xam, ._, ._, ._, ._ },
                         .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ },
-                        .{ ._, ._, .@"test", .tmp3b, .si(0b0_1_000_100), ._, ._ },
+                        .{ ._, ._, .@"test", .tmp3b, .si(0b1_000_100), ._, ._ },
                         .{ ._, ._z, .j, .@"1f", ._, ._, ._ },
                         .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ },
                         .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ },
-                        .{ ._, ._, .@"test", .tmp3b, .si(0b0_0_000_001), ._, ._ },
+                        .{ ._, ._, .@"test", .tmp3b, .si(0b0_000_001), ._, ._ },
                         .{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
                         .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ },
                         .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ },
@@ -10463,47 +10363,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
-                }, .{
-                    .required_features = .{ .x87, null, null, null },
-                    .src_constraints = .{
-                        .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
-                        .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
-                    },
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .to_mem } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                        .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                        .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                        .{ .type = .u8, .kind = .{ .reg = .ah } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{.mem},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ },
-                        .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
-                        .{ ._, .f_, .ucom, .tmp1t, ._, ._, ._ },
-                        .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ },
-                        .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        .{ ._, ._p, .j, .@"1f", ._, ._, ._ },
-                        .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ },
-                        .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ },
-                        .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        .{ ._, ._b, .j, .@"2f", ._, ._, ._ },
-                        .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ },
-                        .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ },
-                        .{ .@"2:", .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ },
-                        .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ },
-                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
-                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                    } },
                 }, .{
                     .required_features = .{ .sse, null, null, null },
                     .src_constraints = .{
@@ -12186,7 +12045,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .loop => if (use_old) try cg.airLoop(inst) else {
                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
                 const extra = cg.air.extraData(Air.Block, ty_pl.payload);
-                cg.scope_generation += 1;
                 try cg.loops.putNoClobber(cg.gpa, inst, .{
                     .state = try cg.saveState(),
                     .target = @intCast(cg.mir_instructions.len),
@@ -15808,16 +15666,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                                 .ne => &.{
                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
@@ -15833,16 +15690,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                             } },
                         }, .{
@@ -15971,16 +15827,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                                 .ne => &.{
                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
@@ -15996,16 +15851,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                             } },
                         }, .{
@@ -16042,16 +15896,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                                 .ne => &.{
                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
@@ -16067,16 +15920,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                             } },
                         }, .{
@@ -16205,16 +16057,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                                 .ne => &.{
                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
@@ -16230,16 +16081,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                             } },
                         }, .{
@@ -16276,16 +16126,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                                 .ne => &.{
                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
@@ -16301,16 +16150,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                             } },
                         }, .{
@@ -16394,16 +16242,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                                 .ne => &.{
                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
@@ -16421,16 +16268,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                             } },
                         }, .{
@@ -16470,16 +16316,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                                 .ne => &.{
                                     .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
@@ -16498,16 +16343,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                     .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
                                     .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
                                     .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ },
                                     .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
                                     .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                     .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                     .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                    .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                    .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ },
-                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+                                    .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                    .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                                 },
                             } },
                         }, .{
@@ -17059,16 +16903,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ },
                                 .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
                                 .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ },
                                 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
                                 .{ .@"1:", ._, .add, .tmp0p, .si(1), ._, ._ },
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0p, .@"8", .tmp1), .tmp2p, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .f16c, null, null, null },
@@ -17809,9 +17652,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp1q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .avx, null, null },
@@ -17864,9 +17706,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp1q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .sse2, .slow_incdec, null },
@@ -17920,9 +17761,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp1q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .sse2, null, null },
@@ -17976,9 +17816,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp1q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .sse, .slow_incdec, null },
@@ -18034,9 +17873,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp1q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .sse, null, null },
@@ -18092,9 +17930,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp1q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .avx, .slow_incdec, null, null },
@@ -18447,9 +18284,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .sse2, null, null, null },
@@ -18497,9 +18333,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .x87, .cmov, .slow_incdec, null },
@@ -18557,9 +18392,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .x87, .cmov, null, null },
@@ -18617,9 +18451,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .x87, null, null, null },
@@ -18652,8 +18485,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, .f_, .ld, .memia(.src0q, .tmp0, .add_size), ._, ._, ._ },
                                 .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
                                 .{ ._, .fn_sw, .st, .tmp6w, ._, ._, ._ },
-                                .{ ._, ._, .xor, .tmp6b, .si(0b0_1_000_000), ._, ._ },
-                                .{ ._, ._, .@"test", .tmp6b, .si(0b0_1_000_100), ._, ._ },
+                                .{ ._, ._, .xor, .tmp6b, .si(0b1_000_000), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6b, .si(0b1_000_100), ._, ._ },
                                 .{ ._, .fromCond(cc), .set, .tmp3b, ._, ._, ._ },
                                 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
                                 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
@@ -18668,72 +18501,70 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, .cmov, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_p, .ucomi, .tmp5t, .tmp6t, ._, ._ },
+                                .{ ._, .f_p, .st, .tmp6t, ._, ._, ._ },
+                                .{ ._, .fromCond(cc), .set, .tmp3b, ._, ._, ._ },
+                                .{ ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => ._np,
+                                    .ne => ._p,
+                                }, .set, .tmp4b, ._, ._, ._ },
+                                .{ ._, ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => .@"and",
+                                    .ne => .@"or",
+                                }, .tmp3b, .tmp4b, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
                                 .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
                                 .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                             } },
                         }, .{
-                            .required_features = .{ .x87, .cmov, .slow_incdec, null },
-                            .src_constraints = .{
-                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
-                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
-                            },
-                            .patterns = &.{
-                                .{ .src = .{ .to_mem, .to_mem } },
-                            },
-                            .extra_temps = .{
-                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
-                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
-                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
-                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
-                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                                .unused,
-                                .unused,
-                            },
-                            .dst_temps = .{.mem},
-                            .clobbers = .{ .eflags = true },
-                            .each = .{ .once = &.{
-                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
-                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
-                                .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
-                                .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
-                                .{ ._, .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ },
-                                .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
-                                .{ ._, .f_p, .ucomi, .tmp5t, .tmp6t, ._, ._ },
-                                .{ ._, .f_p, .st, .tmp6t, ._, ._, ._ },
-                                .{ ._, .fromCond(cc), .set, .tmp3b, ._, ._, ._ },
-                                .{ ._, switch (cc) {
-                                    else => unreachable,
-                                    .e => ._np,
-                                    .ne => ._p,
-                                }, .set, .tmp4b, ._, ._, ._ },
-                                .{ ._, ._, switch (cc) {
-                                    else => unreachable,
-                                    .e => .@"and",
-                                    .ne => .@"or",
-                                }, .tmp3b, .tmp4b, ._, ._ },
-                                .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
-                                .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
-                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
-                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
-                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
-                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
-                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
-                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
-                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
-                            } },
-                        }, .{
-                            .required_features = .{ .x87, .cmov, null, null },
+                            .required_features = .{ .x87, .cmov, null, null },
                             .src_constraints = .{
                                 .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
                                 .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
@@ -18788,9 +18619,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .x87, null, null, null },
@@ -18823,8 +18653,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
                                 .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
                                 .{ ._, .fn_sw, .st, .tmp6w, ._, ._, ._ },
-                                .{ ._, ._, .xor, .tmp6b, .si(0b0_1_000_000), ._, ._ },
-                                .{ ._, ._, .@"test", .tmp6b, .si(0b0_1_000_100), ._, ._ },
+                                .{ ._, ._, .xor, .tmp6b, .si(0b1_000_000), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6b, .si(0b1_000_100), ._, ._ },
                                 .{ ._, .fromCond(cc), .set, .tmp3b, ._, ._, ._ },
                                 .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
                                 .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
@@ -18839,9 +18669,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
-                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .avx, .slow_incdec, null, null },
@@ -19164,9 +18993,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp1), .tmp2q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .avx, null, null },
@@ -19219,9 +19047,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp1), .tmp2q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .sse2, .slow_incdec, null },
@@ -19274,9 +19101,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp1), .tmp2q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .sse2, null, null },
@@ -19329,9 +19155,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp1), .tmp2q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .sse, .slow_incdec, null },
@@ -19384,9 +19209,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp1), .tmp2q, ._, ._ },
                             } },
                         }, .{
                             .required_features = .{ .@"64bit", .sse, null, null },
@@ -19439,9 +19263,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
                                 .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
-                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
-                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
-                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp1d, .ui(6), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp1), .tmp2q, ._, ._ },
                             } },
                         } },
                     }) catch |err| switch (err) {
@@ -19651,11 +19474,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ },
-                        .{ ._, ._r, .sa, .tmp1q, .si(63), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1q, .ui(63), ._, ._ },
                         .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
                         .{ ._, ._, .xor, .tmp3q, .tmp1q, ._, ._ },
-                        .{ ._, ._r, .sh, .tmp2b, .si(1), ._, ._ },
+                        .{ ._, ._r, .sh, .tmp2b, .ui(1), ._, ._ },
                         .{ ._, ._, .sbb, .tmp3q, .tmp1q, ._, ._ },
                         .{ ._, ._c, .set, .tmp2b, ._, ._, ._ },
                         .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp3q, ._, ._ },
@@ -20413,11 +20236,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp2q, .memiad(.src0q, .tmp0, .add_src0_elem_size, -8), ._, ._ },
-                        .{ ._, ._r, .sa, .tmp2q, .si(63), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp2q, .ui(63), ._, ._ },
                         .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
                         .{ .@"1:", ._, .mov, .tmp4q, .memi(.src0q, .tmp0), ._, ._ },
                         .{ ._, ._, .xor, .tmp4q, .tmp2q, ._, ._ },
-                        .{ ._, ._r, .sh, .tmp3b, .si(1), ._, ._ },
+                        .{ ._, ._r, .sh, .tmp3b, .ui(1), ._, ._ },
                         .{ ._, ._, .sbb, .tmp4q, .tmp2q, ._, ._ },
                         .{ ._, ._c, .set, .tmp3b, ._, ._, ._ },
                         .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp4q, ._, ._ },
@@ -20932,185 +20755,405 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .cmp_gte_optimized,
             .cmp_gt,
             .cmp_gt_optimized,
-            => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) {
-                else => unreachable,
-                .cmp_lt, .cmp_lt_optimized => .lt,
-                .cmp_lte, .cmp_lte_optimized => .lte,
-                .cmp_gte, .cmp_gte_optimized => .gte,
-                .cmp_gt, .cmp_gt_optimized => .gt,
-            }) else fallback: {
+            => |air_tag| if (use_old) try cg.airCmp(inst, air_tag.toCmpOp().?) else {
                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
-                const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu);
-                if (scalar_ty.isRuntimeFloat()) break :fallback try cg.airCmp(inst, switch (air_tag) {
-                    else => unreachable,
-                    .cmp_lt, .cmp_lt_optimized => .lt,
-                    .cmp_lte, .cmp_lte_optimized => .lte,
-                    .cmp_gte, .cmp_gte_optimized => .gte,
-                    .cmp_gt, .cmp_gt_optimized => .gt,
-                });
-                const signedness = if (scalar_ty.isAbiInt(zcu))
-                    scalar_ty.intInfo(zcu).signedness
-                else
-                    .unsigned;
+                const cmp_op = air_tag.toCmpOp().?;
                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
                 var res: [1]Temp = undefined;
-                cg.select(&res, &.{.bool}, &ops, switch (@as(Condition, switch (signedness) {
-                    .signed => switch (air_tag) {
+                (if (cg.floatBits(cg.typeOf(bin_op.lhs))) |_| err: {
+                    switch (cmp_op) {
                         else => unreachable,
-                        .cmp_lt, .cmp_lt_optimized => .l,
-                        .cmp_lte, .cmp_lte_optimized => .le,
-                        .cmp_gte, .cmp_gte_optimized => .ge,
-                        .cmp_gt, .cmp_gt_optimized => .g,
-                    },
-                    .unsigned => switch (air_tag) {
+                        .lt, .lte => {},
+                        .gt, .gte => std.mem.swap(Temp, &ops[0], &ops[1]),
+                    }
+                    break :err cg.select(&res, &.{.bool}, &ops, switch (switch (cmp_op) {
                         else => unreachable,
-                        .cmp_lt, .cmp_lt_optimized => .b,
-                        .cmp_lte, .cmp_lte_optimized => .be,
-                        .cmp_gte, .cmp_gte_optimized => .ae,
-                        .cmp_gt, .cmp_gt_optimized => .a,
-                    },
-                })) {
-                    else => unreachable,
-                    inline .l, .le, .ge, .g, .b, .be, .ae, .a => |cc| comptime &.{ .{
-                        .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
-                        .patterns = &.{
-                            .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
-                        },
-                        .dst_temps = .{.{ .cc = cc.commute() }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .imm8 } },
-                            .{ .src = .{ .to_gpr, .imm8 } },
-                            .{ .src = .{ .to_gpr, .mem } },
-                            .{ .src = .{ .to_gpr, .to_gpr } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
-                        .patterns = &.{
-                            .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
-                        },
-                        .dst_temps = .{.{ .cc = cc.commute() }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .imm16 } },
-                            .{ .src = .{ .to_gpr, .imm16 } },
-                            .{ .src = .{ .to_gpr, .mem } },
-                            .{ .src = .{ .to_gpr, .to_gpr } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
-                        .patterns = &.{
-                            .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
-                        },
-                        .dst_temps = .{.{ .cc = cc.commute() }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .imm32 } },
-                            .{ .src = .{ .to_gpr, .imm32 } },
-                            .{ .src = .{ .to_gpr, .mem } },
-                            .{ .src = .{ .to_gpr, .to_gpr } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .@"64bit", null, null, null },
-                        .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
-                        .patterns = &.{
-                            .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
-                        },
-                        .dst_temps = .{.{ .cc = cc.commute() }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .@"64bit", null, null, null },
-                        .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .simm32 } },
-                            .{ .src = .{ .to_gpr, .simm32 } },
-                            .{ .src = .{ .to_gpr, .mem } },
-                            .{ .src = .{ .to_gpr, .to_gpr } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{ .any_int, .any_int },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem }, .commute = switch (cc) {
-                                else => unreachable,
-                                .l, .ge, .b, .ae => .{ 0, 0 },
-                                .le, .g, .be, .a => .{ 0, 1 },
+                        .lt, .gt => true,
+                        .lte, .gte => false,
+                    }) {
+                        inline false, true => |strict| comptime &.{ .{
+                            .required_features = .{ .f16c, null, null, null },
+                            .src_constraints = .{ .{ .float = .word }, .{ .float = .word } },
+                            .patterns = &.{
+                                .{ .src = .{ .to_sse, .to_sse }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .kind = .{ .mut_rc = .{ .ref = .src0, .rc = .sse } } },
+                                .{ .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .v_ps, .cvtph2, .tmp0x, .src0q, ._, ._ },
+                                .{ ._, .v_ps, .cvtph2, .tmp1x, .src1q, ._, ._ },
+                                .{ ._, .v_ss, .ucomi, .tmp0x, .tmp1d, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, null, null, null },
+                            .src_constraints = .{ .{ .float = .word }, .{ .float = .word } },
+                            .patterns = &.{
+                                .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__cmphf2" } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .l,
+                                false => .le,
+                            } }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{ .{ .float = .dword }, .{ .float = .dword } },
+                            .patterns = &.{
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse }, .commute = .{ 0, 1 } },
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .v_ss, .ucomi, .src0x, .src1d, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, null, null, null },
+                            .src_constraints = .{ .{ .float = .dword }, .{ .float = .dword } },
+                            .patterns = &.{
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse }, .commute = .{ 0, 1 } },
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._ss, .ucomi, .src0x, .src1d, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                            .patterns = &.{
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse }, .commute = .{ 0, 1 } },
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .v_sd, .ucomi, .src0x, .src1q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, null, null, null },
+                            .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                            .patterns = &.{
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse }, .commute = .{ 0, 1 } },
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._sd, .ucomi, .src0x, .src1q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, .cmov, null, null },
+                            .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .f_, .ld, .src1q, ._, ._, ._ },
+                                .{ ._, .f_, .ld, .src0q, ._, ._, ._ },
+                                .{ ._, .f_p, .ucomi, .tmp0t, .tmp1t, ._, ._ },
+                                .{ ._, .f_p, .st, .tmp1t, ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sahf, .x87, null, null },
+                            .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .f_, .ld, .src1q, ._, ._, ._ },
+                                .{ ._, .f_, .ld, .src0q, ._, ._, ._ },
+                                .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                .{ ._, ._, .sahf, ._, ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, null, null, null },
+                            .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .z,
+                                false => .nc,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .f_, .ld, .src1q, ._, ._, ._ },
+                                .{ ._, .f_, .ld, .src0q, ._, ._, ._ },
+                                .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                switch (strict) {
+                                    true => .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_001), ._, ._ },
+                                    false => .{ ._, ._r, .sh, .tmp2b, .ui(1), ._, ._ },
+                                },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, .cmov, null, null },
+                            .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                            .patterns = &.{
+                                .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_x87, .to_x87 }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                .{ ._, .f_p, .ucomi, .tmp0t, .src1t, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sahf, .x87, null, null },
+                            .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                            .patterns = &.{
+                                .{ .src = .{ .mem, .mem }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
+                                .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                .{ ._, ._, .sahf, ._, ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sahf, .x87, null, null },
+                            .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                            .patterns = &.{
+                                .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_x87, .to_x87 }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .a,
+                                false => .ae,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
+                                .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                .{ ._, ._, .sahf, ._, ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, null, null, null },
+                            .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                            .patterns = &.{
+                                .{ .src = .{ .mem, .mem }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .z,
+                                false => .nc,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
+                                .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                switch (strict) {
+                                    true => .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_001), ._, ._ },
+                                    false => .{ ._, ._r, .sh, .tmp2b, .ui(1), ._, ._ },
+                                },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, null, null, null },
+                            .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                            .patterns = &.{
+                                .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_x87, .to_x87 }, .commute = .{ 0, 1 } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .z,
+                                false => .nc,
+                            } }},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
+                                .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                switch (strict) {
+                                    true => .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_001), ._, ._ },
+                                    false => .{ ._, ._r, .sh, .tmp2b, .ui(1), ._, ._ },
+                                },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, null, null, null },
+                            .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword } },
+                            .patterns = &.{
+                                .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__cmptf2" } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .cc = switch (strict) {
+                                true => .l,
+                                false => .le,
+                            } }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
                             } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .rc = .general_purpose }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                            .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ },
-                            .{ .@"0:", ._r, .sh, .tmp1b, .si(1), ._, ._ },
-                            .{ ._, ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
-                            .{ ._, ._, .sbb, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
-                            .{ ._, ._c, .set, .tmp1b, ._, ._, ._ },
-                            .{ ._, .fromCond(switch (cc) {
-                                else => unreachable,
-                                .l, .ge, .b, .ae => cc,
-                                .le, .g, .be, .a => cc.commute(),
-                            }), .set, .dst0b, ._, ._, ._ },
-                            .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                         } },
-                    } },
+                    });
+                } else err: {
+                    res[0] = ops[0].cmpInts(cmp_op, &ops[1], cg) catch |err| break :err err;
                 }) catch |err| switch (err) {
                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
                         @tagName(air_tag),
@@ -21126,994 +21169,502 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .cmp_eq_optimized,
             .cmp_neq,
             .cmp_neq_optimized,
-            => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) {
-                else => unreachable,
-                .cmp_eq, .cmp_eq_optimized => .eq,
-                .cmp_neq, .cmp_neq_optimized => .neq,
-            }) else fallback: {
+            => |air_tag| if (use_old) try cg.airCmp(inst, air_tag.toCmpOp().?) else {
                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
-                const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu);
-                if (cg.intInfo(scalar_ty) == null and cg.floatBits(scalar_ty) == null) break :fallback try cg.airCmp(inst, switch (air_tag) {
-                    else => unreachable,
-                    .cmp_eq, .cmp_eq_optimized => .eq,
-                    .cmp_neq, .cmp_neq_optimized => .neq,
-                });
+                const cmp_op = air_tag.toCmpOp().?;
                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+                const ty = cg.typeOf(bin_op.lhs);
                 var res: [1]Temp = undefined;
-                cg.select(&res, &.{.bool}, &ops, switch (@as(Condition, switch (air_tag) {
-                    else => unreachable,
-                    .cmp_eq, .cmp_eq_optimized => .e,
-                    .cmp_neq, .cmp_neq_optimized => .ne,
-                })) {
-                    else => unreachable,
-                    inline .e, .ne => |cc| comptime &.{ .{
-                        .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .imm8 } },
-                            .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .imm8 } },
-                            .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .mem } },
-                            .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .to_gpr } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .imm16 } },
-                            .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .imm16 } },
-                            .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .mem } },
-                            .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .to_gpr } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .imm32 } },
-                            .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .imm32 } },
-                            .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .mem } },
-                            .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .to_gpr } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .@"64bit", null, null, null },
-                        .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .simm32 } },
-                            .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .simm32 } },
-                            .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .mem } },
-                            .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_gpr, .to_gpr } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse, .mmx, null, null },
-                        .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mut_mm, .mem } },
-                            .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_mut_mm, .to_mm } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .mmx } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
-                            .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ },
-                            .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ },
-                            .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
-                            .{ ._, ._, .xor, .tmp0d, .si(0xff), ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx, null, null, null },
-                        .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_xmm, .mem } },
-                            .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_xmm, .to_xmm } },
-                        },
-                        .extra_temps = .{
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ },
-                            .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse4_1, null, null, null },
-                        .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mut_xmm, .mem } },
-                            .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_mut_xmm, .to_xmm } },
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
-                            .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse2, null, null, null },
-                        .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mut_xmm, .mem } },
-                            .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_mut_xmm, .to_xmm } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
-                            .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
-                            .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ },
-                            .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
-                            .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx2, null, null, null },
-                        .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_ymm, .mem } },
-                            .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_ymm, .to_ymm } },
+                const OptInfo = struct {
+                    deaths: [2]Air.Inst.Index,
+                    res: [1]Temp,
+                    state: State,
+                    reloc: Mir.Inst.Index,
+                };
+                var opt_info: ?OptInfo = null;
+                (err: switch (@as(enum { float, int }, if (cg.floatBits(ty)) |_|
+                    .float
+                else if (cg.intInfo(ty)) |_|
+                    .int
+                else category: {
+                    const child_ty = ty.optionalChild(zcu);
+                    const has_value_off: u31 = @intCast(child_ty.abiSize(zcu));
+                    var has_values: [2]Temp = undefined;
+                    opt_info = @as(OptInfo, undefined);
+                    for (&has_values, &ops, &opt_info.?.deaths) |*has_value, *op, *death| {
+                        has_value.* = try op.read(.bool, .{ .disp = has_value_off }, cg);
+                        const child = try op.read(child_ty, .{}, cg);
+                        try op.die(cg);
+                        op.* = child;
+                        death.* = child.index;
+                    }
+                    cg.select(
+                        &opt_info.?.res,
+                        &.{.bool},
+                        &has_values,
+                        switch (Condition.fromCompareOperatorUnsigned(cmp_op)) {
+                            else => unreachable,
+                            inline .e, .ne => |cc| comptime &.{.{
+                                .src_constraints = .{ .{ .size = .byte }, .{ .size = .byte } },
+                                .patterns = &.{
+                                    .{ .src = .{ .mem, .imm8 } },
+                                    .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .to_gpr, .imm8 } },
+                                    .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .mem, .to_gpr } },
+                                    .{ .src = .{ .to_gpr, .mem }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .to_gpr, .to_gpr } },
+                                },
+                                .dst_temps = .{.{ .rc = .general_purpose }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                    .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                                    .{ ._, .fromCond(cc), .set, .dst0b, ._, ._, ._ },
+                                    .{ ._, ._, .@"test", .src0b, .src1b, ._, ._ },
+                                } },
+                            }},
                         },
-                        .extra_temps = .{
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ },
-                            .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx, null, null, null },
-                        .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_ymm, .mem } },
-                            .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_ymm, .to_ymm } },
-                        },
-                        .extra_temps = .{
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ },
-                            .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx2, null, null, null },
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .yword, .is = .xword } },
-                            .{ .remainder_int = .{ .of = .yword, .is = .xword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
-                            .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
-                            .{ .@"0:", .v_dqu, .mov, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ },
-                            .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ },
-                            .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
-                            .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, .v_dqa, .mov, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ },
-                            .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ },
-                            .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
-                            .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx2, null, null, null },
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .yword, .is = .yword } },
-                            .{ .remainder_int = .{ .of = .yword, .is = .yword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                            .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
-                            .{ .@"0:", .v_dqu, .mov, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ },
-                            .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
-                            .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx, null, null, null },
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .yword, .is = .xword } },
-                            .{ .remainder_int = .{ .of = .yword, .is = .xword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
-                            .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
-                            .{ .@"0:", .v_pd, .movu, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ },
-                            .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ },
-                            .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
-                            .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, .v_pd, .mova, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ },
-                            .{ ._, .v_pd, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ },
-                            .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
-                            .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx, null, null, null },
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .yword, .is = .yword } },
-                            .{ .remainder_int = .{ .of = .yword, .is = .yword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                            .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ },
-                            .{ .@"0:", .v_pd, .movu, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ },
-                            .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ },
-                            .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx, null, null, null },
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .xword, .is = .xword } },
-                            .{ .remainder_int = .{ .of = .xword, .is = .xword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                            .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ },
-                            .{ .@"0:", .v_dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._ },
-                            .{ ._, .vp_, .@"or", .tmp1x, .tmp1x, .tmp2x, ._ },
-                            .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse4_1, null, null, null },
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .xword, .is = .xword } },
-                            .{ .remainder_int = .{ .of = .xword, .is = .xword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                            .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
-                            .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
-                            .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, .p_, .@"test", .tmp1x, .tmp1x, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse2, null, null, null },
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .xword, .is = .xword } },
-                            .{ .remainder_int = .{ .of = .xword, .is = .xword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .{ .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                            .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
-                            .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
-                            .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
-                            .{ ._, .p_b, .cmpeq, .tmp1x, .tmp2x, ._, ._ },
-                            .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
-                            .{ ._, ._, .cmp, .tmp0d, .si(0xffff), ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse, .mmx, null, null },
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .qword, .is = .qword } },
-                            .{ .remainder_int = .{ .of = .qword, .is = .qword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .kind = .{ .rc = .mmx } },
-                            .{ .kind = .{ .rc = .mmx } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                            .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
-                            .{ .@"0:", ._q, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .p_, .xor, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
-                            .{ ._, .p_, .@"or", .tmp1q, .tmp2q, ._, ._ },
-                            .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ },
-                            .{ ._, .p_b, .cmpeq, .tmp1q, .tmp2q, ._, ._ },
-                            .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
-                            .{ ._, ._, .cmp, .tmp0d, .si(0xff), ._, ._ },
-                        } },
-                    }, .{
-                        .src_constraints = .{
-                            .{ .remainder_int = .{ .of = .qword, .is = .qword } },
-                            .{ .remainder_int = .{ .of = .qword, .is = .qword } },
-                        },
-                        .patterns = &.{
-                            .{ .src = .{ .to_mem, .to_mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                            .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                            .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                            .{ ._, ._, .xor, .tmp1p, .tmp1p, ._, ._ },
-                            .{ .@"0:", ._, .mov, .tmp2p, .memia(.src0p, .tmp0, .add_size), ._, ._ },
-                            .{ ._, ._, .xor, .tmp2p, .memia(.src1p, .tmp0, .add_size), ._, ._ },
-                            .{ ._, ._, .@"or", .tmp1p, .tmp2p, ._, ._ },
-                            .{ ._, ._, .add, .tmp0p, .sa(.tmp2, .add_size), ._, ._ },
-                            .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
-                            .{ ._, ._, .@"test", .tmp1p, .tmp1p, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .f16c, null, null, null },
-                        .src_constraints = .{ .{ .float = .word }, .{ .float = .word } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_sse, .to_sse } },
-                        },
-                        .extra_temps = .{
-                            .{ .kind = .{ .mut_rc = .{ .ref = .src0, .rc = .sse } } },
-                            .{ .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
-                            else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .v_ps, .cvtph2, .tmp0x, .src0q, ._, ._ },
-                            .{ ._, .v_ps, .cvtph2, .tmp1x, .src1q, ._, ._ },
-                            .{ ._, .v_ss, .ucomi, .tmp0x, .tmp1x, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse, null, null, null },
-                        .src_constraints = .{ .{ .float = .word }, .{ .float = .word } },
-                        .patterns = &.{
-                            .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
-                        },
-                        .call_frame = .{ .alignment = .@"16" },
-                        .extra_temps = .{
-                            .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
-                                else => unreachable,
-                                .e => "__eqhf2",
-                                .ne => "__nehf2",
-                            } } } },
-                            .{ .type = .i32, .kind = .{ .reg = .eax } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
-                            .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx, null, null, null },
-                        .src_constraints = .{ .{ .float = .dword }, .{ .float = .dword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_sse, .mem } },
-                            .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_sse, .to_sse } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f16, .kind = .{ .rc = .sse } },
-                            .{ .type = .f16, .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
-                            else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .v_ss, .ucomi, .src0x, .src1d, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse, null, null, null },
-                        .src_constraints = .{ .{ .float = .dword }, .{ .float = .dword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_sse, .mem } },
-                            .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_sse, .to_sse } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f16, .kind = .{ .rc = .sse } },
-                            .{ .type = .f16, .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
-                            else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._ss, .ucomi, .src0x, .src1d, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse, null, null, null },
-                        .src_constraints = .{ .{ .float = .dword }, .{ .float = .word } },
-                        .patterns = &.{
-                            .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
-                        },
-                        .call_frame = .{ .alignment = .@"16" },
-                        .extra_temps = .{
-                            .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
-                                else => unreachable,
-                                .e => "__eqsf2",
-                                .ne => "__nesf2",
-                            } } } },
-                            .{ .type = .i32, .kind = .{ .reg = .eax } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
-                            .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .avx, null, null, null },
-                        .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_sse, .mem } },
-                            .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_sse, .to_sse } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f16, .kind = .{ .rc = .sse } },
-                            .{ .type = .f16, .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
-                            else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .v_sd, .ucomi, .src0x, .src1q, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse2, null, null, null },
-                        .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_sse, .mem } },
-                            .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .to_sse, .to_sse } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f16, .kind = .{ .rc = .sse } },
-                            .{ .type = .f16, .kind = .{ .rc = .sse } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
-                            else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, ._sd, .ucomi, .src0x, .src1q, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse, null, null, null },
-                        .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
-                        .patterns = &.{
-                            .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
-                        },
-                        .call_frame = .{ .alignment = .@"16" },
-                        .extra_temps = .{
-                            .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
-                                else => unreachable,
-                                .e => "__eqdf2",
-                                .ne => "__nedf2",
-                            } } } },
-                            .{ .type = .i32, .kind = .{ .reg = .eax } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
-                            .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .x87, .cmov, null, null },
-                        .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .mem, .to_x87 } },
-                            .{ .src = .{ .to_x87, .to_x87 } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
-                            else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                            .{ ._, .f_p, .ucomi, .tmp0t, .src1t, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sahf, .x87, null, null },
-                        .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                            .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u8, .kind = .{ .reg = .ah } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
-                            else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
-                            .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                            .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
-                            .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
-                            .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .@"64bit", .x87, null, null },
-                        .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                            .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u8, .kind = .{ .reg = .ah } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
-                            .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                            .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
-                            .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
-                            .{ ._, ._, .xor, .tmp2b, .si(0b0_1_000_000), ._, ._ },
-                            .{ ._, ._, .@"test", .tmp2b, .si(0b0_1_000_100), ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .x87, null, null, null },
-                        .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
-                        .patterns = &.{
-                            .{ .src = .{ .mem, .mem } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                            .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u8, .kind = .{ .reg = .ah } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
-                            else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
-                            .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                            .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
-                            .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
-                            .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sahf, .x87, null, null },
-                        .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .mem, .to_x87 } },
-                            .{ .src = .{ .to_x87, .to_x87 } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                            .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u16, .kind = .{ .reg = .ah } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
+                    ) catch |err| switch (err) {
+                        error.SelectFailed => unreachable,
+                        else => |e| return e,
+                    };
+                    for (has_values) |has_value| for (opt_info.?.res) |opt_res| {
+                        if (has_value.index == opt_res.index) break;
+                    } else try has_value.die(cg);
+                    opt_info.?.state = cg.initRetroactiveState();
+                    opt_info.?.state.next_temp_index = cg.next_temp_index;
+                    var state = try cg.saveState();
+                    state.next_temp_index = cg.next_temp_index;
+                    for (ops) |op| try op.die(cg);
+                    try cg.saveRetroactiveState(&opt_info.?.state);
+                    opt_info.?.reloc = try cg.asmJccReloc(.z, undefined);
+                    try cg.restoreState(state, &.{}, .{
+                        .emit_instructions = false,
+                        .update_tracking = true,
+                        .resurrect = true,
+                        .close_scope = true,
+                    });
+                    break :category if (cg.floatBits(child_ty)) |_| .float else .int;
+                })) {
+                    .float => {
+                        cg.select(&res, &.{.bool}, &ops, switch (switch (air_tag) {
                             else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                            .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
-                            .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
-                            .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .@"64bit", .x87, null, null },
-                        .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .mem, .to_x87 } },
-                            .{ .src = .{ .to_x87, .to_x87 } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                            .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u8, .kind = .{ .reg = .ah } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                            .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
-                            .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
-                            .{ ._, ._, .xor, .tmp2b, .si(0b0_1_000_000), ._, ._ },
-                            .{ ._, ._, .@"test", .tmp2b, .si(0b0_1_000_100), ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .x87, null, null, null },
-                        .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
-                        .patterns = &.{
-                            .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
-                            .{ .src = .{ .mem, .to_x87 } },
-                            .{ .src = .{ .to_x87, .to_x87 } },
-                        },
-                        .extra_temps = .{
-                            .{ .type = .f80, .kind = .{ .reg = .st6 } },
-                            .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u16, .kind = .{ .reg = .ah } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = switch (cc) {
+                            .cmp_eq, .cmp_neq => false,
+                            .cmp_eq_optimized, .cmp_neq_optimized => true,
+                        }) {
+                            inline false, true => |optimized| comptime &.{ .{
+                                .required_features = .{ .f16c, null, null, null },
+                                .src_constraints = .{ .{ .float = .word }, .{ .float = .word } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_sse, .to_sse } },
+                                },
+                                .extra_temps = .{
+                                    .{ .kind = .{ .mut_rc = .{ .ref = .src0, .rc = .sse } } },
+                                    .{ .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, .v_ps, .cvtph2, .tmp0x, .src0q, ._, ._ },
+                                    .{ ._, .v_ps, .cvtph2, .tmp1x, .src1q, ._, ._ },
+                                    .{ ._, .v_ss, .ucomi, .tmp0x, .tmp1x, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .sse, null, null, null },
+                                .src_constraints = .{ .{ .float = .word }, .{ .float = .word } },
+                                .patterns = &.{
+                                    .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
+                                },
+                                .call_frame = .{ .alignment = .@"16" },
+                                .extra_temps = .{
+                                    .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__cmphf2" } } },
+                                    .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = .z }},
+                                .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                                .each = .{ .once = &.{
+                                    .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                                    .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .avx, null, null, null },
+                                .src_constraints = .{ .{ .float = .dword }, .{ .float = .dword } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_sse, .mem } },
+                                    .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .to_sse, .to_sse } },
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, .v_ss, .ucomi, .src0x, .src1d, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .sse, null, null, null },
+                                .src_constraints = .{ .{ .float = .dword }, .{ .float = .dword } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_sse, .mem } },
+                                    .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .to_sse, .to_sse } },
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, ._ss, .ucomi, .src0x, .src1d, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .avx, null, null, null },
+                                .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_sse, .mem } },
+                                    .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .to_sse, .to_sse } },
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, .v_sd, .ucomi, .src0x, .src1q, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .sse2, null, null, null },
+                                .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_sse, .mem } },
+                                    .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .to_sse, .to_sse } },
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, ._sd, .ucomi, .src0x, .src1q, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .x87, .cmov, null, null },
+                                .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_mem, .to_mem }, .commute = .{ 0, 1 } },
+                                },
+                                .extra_temps = .{
+                                    .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                    .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, .f_, .ld, .src1q, ._, ._, ._ },
+                                    .{ ._, .f_, .ld, .src0q, ._, ._, ._ },
+                                    .{ ._, .f_p, .ucomi, .tmp0t, .tmp1t, ._, ._ },
+                                    .{ ._, .f_p, .st, .tmp1t, ._, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .sahf, .x87, null, null },
+                                .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_mem, .to_mem }, .commute = .{ 0, 1 } },
+                                },
+                                .extra_temps = .{
+                                    .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                    .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                    .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, .f_, .ld, .src1q, ._, ._, ._ },
+                                    .{ ._, .f_, .ld, .src0q, ._, ._, ._ },
+                                    .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                    .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                    .{ ._, ._, .sahf, ._, ._, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .x87, null, null, null },
+                                .src_constraints = .{ .{ .float = .qword }, .{ .float = .qword } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_mem, .to_mem } },
+                                },
+                                .extra_temps = .{
+                                    .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                    .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                    .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z,
+                                    true => .nz,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = switch (optimized) {
+                                    false => &.{
+                                        .{ ._, .f_, .ld, .src1q, ._, ._, ._ },
+                                        .{ ._, .f_, .ld, .src0q, ._, ._, ._ },
+                                        .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                        .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                        .{ ._, ._, .xor, .tmp2b, .si(0b1_000_000), ._, ._ },
+                                        .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_100), ._, ._ },
+                                    },
+                                    true => &.{
+                                        .{ ._, .f_, .ld, .src1q, ._, ._, ._ },
+                                        .{ ._, .f_, .ld, .src0q, ._, ._, ._ },
+                                        .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                        .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                        .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_000), ._, ._ },
+                                    },
+                                } },
+                            }, .{
+                                .required_features = .{ .x87, .cmov, null, null },
+                                .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .mem, .to_x87 } },
+                                    .{ .src = .{ .to_x87, .to_x87 } },
+                                },
+                                .extra_temps = .{
+                                    .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                    .{ ._, .f_p, .ucomi, .tmp0t, .src1t, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .sahf, .x87, null, null },
+                                .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                                .patterns = &.{
+                                    .{ .src = .{ .mem, .mem } },
+                                },
+                                .extra_temps = .{
+                                    .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                    .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                    .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
+                                    .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                    .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                    .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                    .{ ._, ._, .sahf, ._, ._, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .sahf, .x87, null, null },
+                                .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .mem, .to_x87 } },
+                                    .{ .src = .{ .to_x87, .to_x87 } },
+                                },
+                                .extra_temps = .{
+                                    .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                    .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                    .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z_and_np,
+                                    true => .z,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = &.{
+                                    .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                    .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
+                                    .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                    .{ ._, ._, .sahf, ._, ._, ._, ._ },
+                                } },
+                            }, .{
+                                .required_features = .{ .x87, null, null, null },
+                                .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                                .patterns = &.{
+                                    .{ .src = .{ .mem, .mem } },
+                                },
+                                .extra_temps = .{
+                                    .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                    .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                    .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z,
+                                    true => .nz,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = switch (optimized) {
+                                    false => &.{
+                                        .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
+                                        .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                        .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                        .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                        .{ ._, ._, .xor, .tmp2b, .si(0b1_000_000), ._, ._ },
+                                        .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_100), ._, ._ },
+                                    },
+                                    true => &.{
+                                        .{ ._, .f_, .ld, .src1t, ._, ._, ._ },
+                                        .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                        .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                        .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                        .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_000), ._, ._ },
+                                    },
+                                } },
+                            }, .{
+                                .required_features = .{ .x87, null, null, null },
+                                .src_constraints = .{ .{ .float = .tbyte }, .{ .float = .tbyte } },
+                                .patterns = &.{
+                                    .{ .src = .{ .to_x87, .mem }, .commute = .{ 0, 1 } },
+                                    .{ .src = .{ .mem, .to_x87 } },
+                                    .{ .src = .{ .to_x87, .to_x87 } },
+                                },
+                                .extra_temps = .{
+                                    .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                    .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                    .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = switch (optimized) {
+                                    false => .z,
+                                    true => .nz,
+                                } }},
+                                .clobbers = .{ .eflags = true },
+                                .each = .{ .once = switch (optimized) {
+                                    false => &.{
+                                        .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                        .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
+                                        .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                        .{ ._, ._, .xor, .tmp2b, .si(0b1_000_000), ._, ._ },
+                                        .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_100), ._, ._ },
+                                    },
+                                    true => &.{
+                                        .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                                        .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
+                                        .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
+                                        .{ ._, ._, .@"test", .tmp2b, .si(0b1_000_000), ._, ._ },
+                                    },
+                                } },
+                            }, .{
+                                .required_features = .{ .sse, null, null, null },
+                                .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword } },
+                                .patterns = &.{
+                                    .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
+                                },
+                                .call_frame = .{ .alignment = .@"16" },
+                                .extra_temps = .{
+                                    .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__cmptf2" } } },
+                                    .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                    .unused,
+                                },
+                                .dst_temps = .{.{ .cc = .z }},
+                                .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                                .each = .{ .once = &.{
+                                    .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                                    .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
+                                } },
+                            } },
+                        }) catch |err| break :err err;
+                        switch (cmp_op) {
                             else => unreachable,
-                            .e => .z_and_np,
-                            .ne => .nz_or_p,
-                        } }},
-                        .clobbers = .{ .eflags = true },
-                        .each = .{ .once = &.{
-                            .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                            .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
-                            .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
-                            .{ ._, ._, .sahf, ._, ._, ._, ._ },
-                        } },
-                    }, .{
-                        .required_features = .{ .sse, null, null, null },
-                        .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword } },
-                        .patterns = &.{
-                            .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
-                        },
-                        .call_frame = .{ .alignment = .@"16" },
-                        .extra_temps = .{
-                            .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
-                                else => unreachable,
-                                .e => "__eqtf2",
-                                .ne => "__netf2",
-                            } } } },
-                            .{ .type = .i32, .kind = .{ .reg = .eax } },
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                            .unused,
-                        },
-                        .dst_temps = .{.{ .cc = cc }},
-                        .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
-                        .each = .{ .once = &.{
-                            .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
-                            .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
-                        } },
-                    } },
+                            .eq => {},
+                            .neq => {
+                                const cc = &res[0].unwrap(cg).temp.tracking(cg).short.eflags;
+                                cc.* = cc.negate();
+                            },
+                        }
+                    },
+                    .int => res[0] = ops[0].cmpInts(cmp_op, &ops[1], cg) catch |err| break :err err,
                 }) catch |err| switch (err) {
                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
                         @tagName(air_tag),
@@ -22123,6 +21674,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     }),
                     else => |e| return e,
                 };
+                if (opt_info) |*oi| {
+                    for (ops) |op| for (res) |r| {
+                        if (op.index == r.index) break;
+                    } else try op.die(cg);
+                    try cg.genCopy(.bool, oi.res[0].tracking(cg).short, res[0].tracking(cg).short, .{});
+                    try res[0].die(cg);
+                    res[0] = oi.res[0];
+                    try cg.restoreState(oi.state, &oi.deaths, .{
+                        .emit_instructions = true,
+                        .update_tracking = true,
+                        .resurrect = true,
+                        .close_scope = true,
+                    });
+                    cg.performReloc(oi.reloc);
+                    @memset(&ops, res[0]);
+                }
                 try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
             },
 
@@ -22169,7 +21736,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .data = .{ .func = old_inline_func },
                 });
             },
-            .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => if (use_old) try cg.airDbgVar(inst) else if (cg.debug_output != .none) {
+            .dbg_var_ptr,
+            .dbg_var_val,
+            .dbg_arg_inline,
+            => if (use_old) try cg.airDbgVar(inst) else if (cg.debug_output != .none) {
                 const pl_op = air_datas[@intFromEnum(inst)].pl_op;
                 var ops = try cg.tempsFromOperands(inst, .{pl_op.operand});
                 var mcv = ops[0].tracking(cg).short;
@@ -22776,7 +22346,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .work_group_size => unreachable,
             .work_group_id => unreachable,
         }
-        cg.resetTemps();
+        try cg.resetTemps();
         cg.checkInvariantsAfterAirInst();
     }
     verbose_tracking_log.debug("{}", .{cg.fmtTracking()});
@@ -22798,9 +22368,6 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
             const ret_reg = param_regs[0];
             const enum_mcv = MCValue{ .register = param_regs[1] };
 
-            const epilogue_relocs = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu));
-            defer self.gpa.free(epilogue_relocs);
-
             const data_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
             const data_lock = self.register_manager.lockRegAssumeUnused(data_reg);
             defer self.register_manager.unlockReg(data_lock);
@@ -22808,12 +22375,21 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
 
             var data_off: i32 = 0;
             const tag_names = enum_ty.enumFields(zcu);
-            for (epilogue_relocs, 0..) |*epilogue_reloc, tag_index| {
+            for (0..enum_ty.enumFieldCount(zcu)) |tag_index| {
+                var arg_temp = try self.tempInit(enum_ty, enum_mcv);
+
                 const tag_name_len = tag_names.get(ip)[tag_index].length(ip);
                 const tag_val = try pt.enumValueFieldIndex(enum_ty, @intCast(tag_index));
-                const tag_mcv = try self.genTypedValue(tag_val);
-                try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv);
-                const skip_reloc = try self.asmJccReloc(.ne, undefined);
+                var tag_temp = try self.tempFromValue(tag_val);
+                const cc_temp = arg_temp.cmpInts(.neq, &tag_temp, self) catch |err| switch (err) {
+                    error.SelectFailed => unreachable,
+                    else => |e| return e,
+                };
+                try arg_temp.die(self);
+                try tag_temp.die(self);
+                const skip_reloc = try self.asmJccReloc(cc_temp.tracking(self).short.eflags, undefined);
+                try cc_temp.die(self);
+                try self.resetTemps();
 
                 try self.genSetMem(
                     .{ .reg = ret_reg },
@@ -22823,17 +22399,14 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
                     .{},
                 );
                 try self.genSetMem(.{ .reg = ret_reg }, 8, .usize, .{ .immediate = tag_name_len }, .{});
+                try self.asmOpOnly(.{ ._, .ret });
 
-                epilogue_reloc.* = try self.asmJmpReloc(undefined);
                 self.performReloc(skip_reloc);
 
                 data_off += @intCast(tag_name_len + 1);
             }
 
             try self.asmOpOnly(.{ ._2, .ud });
-
-            for (epilogue_relocs) |reloc| self.performReloc(reloc);
-            try self.asmOpOnly(.{ ._, .ret });
         },
         else => return self.fail(
             "TODO implement {s} for {}",
@@ -23103,10 +22676,12 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class {
     const elem_ty = ty.childType(zcu);
     return if (elem_ty.toIntern() == .bool_type)
         .general_purpose
-    else if (self.floatBits(elem_ty) == 80)
-        .x87
+    else if (self.floatBits(elem_ty)) |float_bits|
+        if (float_bits == 80) .x87 else .sse
+    else if (self.intInfo(elem_ty)) |_|
+        .sse
     else
-        .sse;
+        .general_purpose;
 }
 
 fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet {
@@ -23149,14 +22724,19 @@ const State = struct {
     registers: RegisterManager.TrackedRegisters,
     reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking,
     free_registers: RegisterManager.RegisterBitSet,
+    next_temp_index: Temp.Index,
     inst_tracking_len: u32,
     scope_generation: u32,
 };
 
 fn initRetroactiveState(self: *CodeGen) State {
+    const scope_generation = self.scope_generation + 1;
+    self.scope_generation = scope_generation;
+
     var state: State = undefined;
+    state.next_temp_index = @enumFromInt(0);
     state.inst_tracking_len = @intCast(self.inst_tracking.count());
-    state.scope_generation = self.scope_generation;
+    state.scope_generation = scope_generation;
     return state;
 }
 
@@ -23185,6 +22765,11 @@ fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, co
     close_scope: bool,
 }) !void {
     if (opts.close_scope) {
+        for (
+            self.inst_tracking.keys()[@intFromEnum(state.next_temp_index)..@intFromEnum(self.next_temp_index)],
+            self.inst_tracking.values()[@intFromEnum(state.next_temp_index)..@intFromEnum(self.next_temp_index)],
+        ) |inst, *tracking| try tracking.die(self, inst);
+        self.next_temp_index = state.next_temp_index;
         for (
             self.inst_tracking.keys()[state.inst_tracking_len..],
             self.inst_tracking.values()[state.inst_tracking_len..],
@@ -23192,10 +22777,16 @@ fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, co
         self.inst_tracking.shrinkRetainingCapacity(state.inst_tracking_len);
     }
 
-    if (opts.resurrect) for (
-        self.inst_tracking.keys()[Temp.Index.max..state.inst_tracking_len],
-        self.inst_tracking.values()[Temp.Index.max..state.inst_tracking_len],
-    ) |inst, *tracking| try tracking.resurrect(self, inst, state.scope_generation);
+    if (opts.resurrect) {
+        for (
+            self.inst_tracking.keys()[0..@intFromEnum(state.next_temp_index)],
+            self.inst_tracking.values()[0..@intFromEnum(state.next_temp_index)],
+        ) |inst, *tracking| try tracking.resurrect(self, inst, state.scope_generation);
+        for (
+            self.inst_tracking.keys()[Temp.Index.max..state.inst_tracking_len],
+            self.inst_tracking.values()[Temp.Index.max..state.inst_tracking_len],
+        ) |inst, *tracking| try tracking.resurrect(self, inst, state.scope_generation);
+    }
     for (deaths) |death| try self.processDeath(death);
 
     const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).array.len]RegisterLock;
@@ -27359,12 +26950,12 @@ fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void {
             try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111);
             try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111);
         }
-        // tmp = temp1 & 0x0F...0F
-        // dst = (temp1 >> 4) & 0x0F...0F
+        // tmp = temp1 & 0x0f...0f
+        // dst = (temp1 >> 4) & 0x0f...0f
         try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, .u(4));
-        // tmp = (temp1 & 0x0F...0F) << 4
+        // tmp = (temp1 & 0x0f...0f) << 4
         try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp);
-        // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4)
+        // dst = temp2 = ((temp1 >> 4) & 0x0f...0f) | ((temp1 & 0x0f...0f) << 4)
         try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst);
         // tmp = temp2
         try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2));
@@ -33520,7 +33111,6 @@ fn genTry(
         if (operand.toIndex()) |operand_inst| try self.processDeath(operand_inst);
     }
 
-    self.scope_generation += 1;
     const state = try self.saveState();
 
     for (liveness_cond_br.else_deaths) |death| try self.processDeath(death);
@@ -33596,7 +33186,6 @@ fn airCondBr(self: *CodeGen, inst: Air.Inst.Index) !void {
         if (pl_op.operand.toIndex()) |op_inst| try self.processDeath(op_inst);
     }
 
-    self.scope_generation += 1;
     const state = try self.saveState();
     const reloc = try self.genCondBrMir(cond_ty, cond);
 
@@ -33978,7 +33567,6 @@ fn airLoop(self: *CodeGen, inst: Air.Inst.Index) !void {
     const loop = self.air.extraData(Air.Block, ty_pl.payload);
     const body: []const Air.Inst.Index = @ptrCast(self.air.extra[loop.end..][0..loop.data.body_len]);
 
-    self.scope_generation += 1;
     const state = try self.saveState();
 
     try self.loops.putNoClobber(self.gpa, inst, .{
@@ -33995,7 +33583,6 @@ fn lowerBlock(self: *CodeGen, inst: Air.Inst.Index, body: []const Air.Inst.Index
     const inst_tracking_i = self.inst_tracking.count();
     self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(.unreach));
 
-    self.scope_generation += 1;
     try self.blocks.putNoClobber(self.gpa, inst, .{ .state = self.initRetroactiveState() });
     const liveness = self.liveness.getBlock(inst);
 
@@ -34033,6 +33620,8 @@ fn lowerSwitchBr(
 ) !void {
     const zcu = self.pt.zcu;
     const condition_ty = self.typeOf(switch_br.operand);
+    const condition_int_info = self.intInfo(condition_ty).?;
+    const condition_int_ty = try self.pt.intType(condition_int_info.signedness, condition_int_info.bits);
 
     const ExpectedContents = extern struct {
         liveness_deaths: [1 << 8 | 1]Air.Inst.Index,
@@ -34043,7 +33632,6 @@ fn lowerSwitchBr(
         std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
     const allocator = stack.get();
 
-    self.scope_generation += 1;
     const state = try self.saveState();
 
     const liveness = try self.liveness.getSwitchBr(allocator, inst, switch_br.cases_len + 1);
@@ -34104,8 +33692,17 @@ fn lowerSwitchBr(
             .{ .air_ref = Air.internedToRef(min.?.toIntern()) },
         );
         const else_reloc = if (switch_br.else_body_len > 0) else_reloc: {
-            try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table_len - 1 });
-            break :else_reloc try self.asmJccReloc(.a, undefined);
+            var cond_temp = try self.tempInit(condition_ty, condition_index);
+            var table_max_temp = try self.tempFromValue(try self.pt.intValue(condition_int_ty, table_len - 1));
+            const cc_temp = cond_temp.cmpInts(.gt, &table_max_temp, self) catch |err| switch (err) {
+                error.SelectFailed => unreachable,
+                else => |e| return e,
+            };
+            try cond_temp.die(self);
+            try table_max_temp.die(self);
+            const else_reloc = try self.asmJccReloc(cc_temp.tracking(self).short.eflags, undefined);
+            try cc_temp.die(self);
+            break :else_reloc else_reloc;
         } else undefined;
         const table_start: u31 = @intCast(self.mir_table.items.len);
         {
@@ -34207,7 +33804,6 @@ fn lowerSwitchBr(
         return;
     }
 
-    const signedness = if (condition_ty.isAbiInt(zcu)) condition_ty.intInfo(zcu).signedness else .unsigned;
     var cases_it = switch_br.iterateCases();
     while (cases_it.next()) |case| {
         var relocs = try allocator.alloc(Mir.Inst.Index, case.items.len + case.ranges.len);
@@ -34223,49 +33819,70 @@ fn lowerSwitchBr(
                     else => unreachable,
                 },
                 else => cc: {
-                    try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv);
-                    break :cc .e;
+                    var cond_temp = try self.tempInit(condition_ty, condition);
+                    var item_temp = try self.tempInit(condition_ty, item_mcv);
+                    const cc_temp = cond_temp.cmpInts(.eq, &item_temp, self) catch |err| switch (err) {
+                        error.SelectFailed => unreachable,
+                        else => |e| return e,
+                    };
+                    try cond_temp.die(self);
+                    try item_temp.die(self);
+                    const cc = cc_temp.tracking(self).short.eflags;
+                    try cc_temp.die(self);
+                    try self.resetTemps();
+                    break :cc cc;
                 },
             };
             reloc.* = try self.asmJccReloc(cc, undefined);
         }
 
         for (case.ranges, relocs[case.items.len..]) |range, *reloc| {
+            var cond_temp = try self.tempInit(condition_ty, condition);
             const min_mcv = try self.resolveInst(range[0]);
             const max_mcv = try self.resolveInst(range[1]);
             // `null` means always false.
-            const lt_min: ?Condition = switch (condition) {
+            const lt_min = cc: switch (condition) {
                 .eflags => |cc| switch (min_mcv.immediate) {
                     0 => null, // condition never <0
                     1 => cc.negate(),
                     else => unreachable,
                 },
-                else => cc: {
-                    try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, min_mcv);
-                    break :cc switch (signedness) {
-                        .unsigned => .b,
-                        .signed => .l,
+                else => {
+                    var min_temp = try self.tempInit(condition_ty, min_mcv);
+                    const cc_temp = cond_temp.cmpInts(.lt, &min_temp, self) catch |err| switch (err) {
+                        error.SelectFailed => unreachable,
+                        else => |e| return e,
                     };
+                    try min_temp.die(self);
+                    const cc = cc_temp.tracking(self).short.eflags;
+                    try cc_temp.die(self);
+                    break :cc cc;
                 },
             };
             const lt_min_reloc = if (lt_min) |cc| r: {
                 break :r try self.asmJccReloc(cc, undefined);
             } else null;
             // `null` means always true.
-            const lte_max: ?Condition = switch (condition) {
+            const lte_max = switch (condition) {
                 .eflags => |cc| switch (max_mcv.immediate) {
                     0 => cc.negate(),
                     1 => null, // condition always >=1
                     else => unreachable,
                 },
                 else => cc: {
-                    try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, max_mcv);
-                    break :cc switch (signedness) {
-                        .unsigned => .be,
-                        .signed => .le,
+                    var max_temp = try self.tempInit(condition_ty, max_mcv);
+                    const cc_temp = cond_temp.cmpInts(.lte, &max_temp, self) catch |err| switch (err) {
+                        error.SelectFailed => unreachable,
+                        else => |e| return e,
                     };
+                    try max_temp.die(self);
+                    const cc = cc_temp.tracking(self).short.eflags;
+                    try cc_temp.die(self);
+                    break :cc cc;
                 },
             };
+            try cond_temp.die(self);
+            try self.resetTemps();
             // "Success" case is in `reloc`....
             if (lte_max) |cc| {
                 reloc.* = try self.asmJccReloc(cc, undefined);
@@ -34350,7 +33967,6 @@ fn airLoopSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void {
     // Ensure a register is available for dispatch.
     if (!mat_cond.isRegister()) _ = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
 
-    self.scope_generation += 1;
     const state = try self.saveState();
 
     try self.loops.putNoClobber(self.gpa, inst, .{
@@ -34403,6 +34019,8 @@ fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void {
 
     if (self.loop_switches.getPtr(br.block_inst)) |table| {
         const condition_ty = self.typeOf(br.operand);
+        const condition_int_info = self.intInfo(condition_ty).?;
+        const condition_int_ty = try self.pt.intType(condition_int_info.signedness, condition_int_info.bits);
         const condition_mcv = block_tracking.short;
         try self.spillEflagsIfOccupied();
         if (table.min.orderAgainstZero(self.pt.zcu).compare(.neq)) try self.genBinOpMir(
@@ -34414,12 +34032,28 @@ fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void {
         switch (table.else_relocs) {
             .@"unreachable" => {},
             .forward => |*else_relocs| {
-                try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_mcv, .{ .immediate = table.len - 1 });
-                try else_relocs.append(self.gpa, try self.asmJccReloc(.a, undefined));
+                var cond_temp = try self.tempInit(condition_ty, condition_mcv);
+                var table_max_temp = try self.tempFromValue(try self.pt.intValue(condition_int_ty, table.len - 1));
+                const cc_temp = cond_temp.cmpInts(.gt, &table_max_temp, self) catch |err| switch (err) {
+                    error.SelectFailed => unreachable,
+                    else => |e| return e,
+                };
+                try cond_temp.die(self);
+                try table_max_temp.die(self);
+                try else_relocs.append(self.gpa, try self.asmJccReloc(cc_temp.tracking(self).short.eflags, undefined));
+                try cc_temp.die(self);
             },
             .backward => |else_reloc| {
-                try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_mcv, .{ .immediate = table.len - 1 });
-                _ = try self.asmJccReloc(.a, else_reloc);
+                var cond_temp = try self.tempInit(condition_ty, condition_mcv);
+                var table_max_temp = try self.tempFromValue(try self.pt.intValue(condition_int_ty, table.len - 1));
+                const cc_temp = cond_temp.cmpInts(.gt, &table_max_temp, self) catch |err| switch (err) {
+                    error.SelectFailed => unreachable,
+                    else => |e| return e,
+                };
+                try cond_temp.die(self);
+                try table_max_temp.die(self);
+                _ = try self.asmJccReloc(cc_temp.tracking(self).short.eflags, else_reloc);
+                try cc_temp.die(self);
             },
         }
         {
@@ -35754,21 +35388,21 @@ fn genSetReg(
         => unreachable,
         .undef => if (opts.safety) switch (dst_reg.class()) {
             .general_purpose => switch (abi_size) {
-                1 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to8(), .u(0xAA)),
-                2 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to16(), .u(0xAAAA)),
+                1 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to8(), .u(0xaa)),
+                2 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to16(), .u(0xaaaa)),
                 3...4 => try self.asmRegisterImmediate(
                     .{ ._, .mov },
                     dst_reg.to32(),
-                    .s(@as(i32, @bitCast(@as(u32, 0xAAAAAAAA)))),
+                    .s(@as(i32, @bitCast(@as(u32, 0xaaaaaaaa)))),
                 ),
                 5...8 => try self.asmRegisterImmediate(
                     .{ ._, .mov },
                     dst_reg.to64(),
-                    .u(0xAAAAAAAAAAAAAAAA),
+                    .u(0xaaaaaaaaaaaaaaaa),
                 ),
                 else => unreachable,
             },
-            .segment, .x87, .mmx, .sse => {
+            .segment, .mmx, .sse => {
                 const full_ty = try pt.vectorType(.{
                     .len = self.vectorSize(.float),
                     .child = .u8_type,
@@ -35780,6 +35414,9 @@ fn genSetReg(
                     } })),
                 ), opts);
             },
+            .x87 => try self.genSetReg(dst_reg, .f80, try self.genTypedValue(
+                try pt.floatValue(.f80, @as(f80, @bitCast(@as(u80, 0xaaaaaaaaaaaaaaaaaaaa)))),
+            ), opts),
             .ip, .cr, .dr => unreachable,
         },
         .eflags => |cc| try self.asmSetccRegister(cc, dst_reg.to8()),
@@ -40161,8 +39798,16 @@ fn resolveCallingConventionValues(
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
     const cc = fn_info.cc;
-    const param_types = try self.gpa.alloc(Type, fn_info.param_types.len + var_args.len);
-    defer self.gpa.free(param_types);
+
+    const ExpectedContents = extern struct {
+        param_types: [32][@sizeOf(Type)]u8 align(@alignOf(Type)),
+    };
+    var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
+        std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
+    const allocator = stack.get();
+
+    const param_types = try allocator.alloc(Type, fn_info.param_types.len + var_args.len);
+    defer allocator.free(param_types);
 
     for (param_types[0..fn_info.param_types.len], fn_info.param_types.get(ip)) |*dest, src|
         dest.* = .fromInterned(src);
@@ -40735,6 +40380,11 @@ fn hasFeature(cg: *CodeGen, feature: std.Target.x86.Feature) bool {
             .ReleaseSmall => true,
         },
         .mmx => false,
+        .sahf => switch (cg.target.cpu.arch) {
+            else => unreachable,
+            .x86 => true,
+            .x86_64 => null,
+        },
         else => null,
     } orelse std.Target.x86.featureSetHas(cg.target.cpu.features, feature);
 }
@@ -40746,8 +40396,7 @@ fn typeOf(self: *CodeGen, inst: Air.Inst.Ref) Type {
 }
 
 fn typeOfIndex(self: *CodeGen, inst: Air.Inst.Index) Type {
-    const temp: Temp = .{ .index = inst };
-    return temp.typeOf(self);
+    return Temp.typeOf(.{ .index = inst }, self);
 }
 
 fn intCompilerRtAbiName(int_bits: u32) u8 {
@@ -40777,980 +40426,1922 @@ fn floatCompilerRtAbiType(self: *CodeGen, ty: Type, other_ty: Type) Type {
     return ty;
 }
 
-fn floatLibcAbiPrefix(ty: Type) []const u8 {
-    return switch (ty.toIntern()) {
-        .f16_type, .f80_type => "__",
-        .f32_type, .f64_type, .f128_type, .c_longdouble_type => "",
-        else => unreachable,
-    };
-}
+fn floatLibcAbiPrefix(ty: Type) []const u8 {
+    return switch (ty.toIntern()) {
+        .f16_type, .f80_type => "__",
+        .f32_type, .f64_type, .f128_type, .c_longdouble_type => "",
+        else => unreachable,
+    };
+}
+
+fn floatLibcAbiSuffix(ty: Type) []const u8 {
+    return switch (ty.toIntern()) {
+        .f16_type => "h",
+        .f32_type => "f",
+        .f64_type => "",
+        .f80_type => "x",
+        .f128_type => "q",
+        .c_longdouble_type => "l",
+        else => unreachable,
+    };
+}
+
+fn promoteInt(self: *CodeGen, ty: Type) Type {
+    const pt = self.pt;
+    const zcu = pt.zcu;
+    const int_info: InternPool.Key.IntType = switch (ty.toIntern()) {
+        .bool_type => .{ .signedness = .unsigned, .bits = 1 },
+        else => if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else return ty,
+    };
+    for ([_]Type{
+        .c_int,      .c_uint,
+        .c_long,     .c_ulong,
+        .c_longlong, .c_ulonglong,
+    }) |promote_ty| {
+        const promote_info = promote_ty.intInfo(zcu);
+        if (int_info.signedness == .signed and promote_info.signedness == .unsigned) continue;
+        if (int_info.bits + @intFromBool(int_info.signedness == .unsigned and
+            promote_info.signedness == .signed) <= promote_info.bits) return promote_ty;
+    }
+    return ty;
+}
+
+fn promoteVarArg(self: *CodeGen, ty: Type) Type {
+    if (!ty.isRuntimeFloat()) return self.promoteInt(ty);
+    switch (ty.floatBits(self.target.*)) {
+        32, 64 => return .f64,
+        else => |float_bits| {
+            assert(float_bits == self.target.cTypeBitSize(.longdouble));
+            return .c_longdouble;
+        },
+    }
+}
+
+fn unalignedSize(cg: *CodeGen, ty: Type) u64 {
+    const zcu = cg.pt.zcu;
+    return switch (zcu.intern_pool.indexToKey(ty.toIntern())) {
+        .vector_type => |vector_type| Type.fromInterned(vector_type.child).abiSize(zcu) * vector_type.len,
+        else => ty.abiSize(zcu),
+    };
+}
+
+fn intInfo(cg: *CodeGen, ty: Type) ?std.builtin.Type.Int {
+    const zcu = cg.pt.zcu;
+    const ip = &zcu.intern_pool;
+    var ty_index = ty.ip_index;
+    while (true) switch (ip.indexToKey(ty_index)) {
+        .int_type => |int_type| return int_type,
+        .ptr_type => |ptr_type| return switch (ptr_type.flags.size) {
+            .one, .many, .c => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() },
+            .slice => null,
+        },
+        .opt_type => |opt_child| return if (!Type.fromInterned(opt_child).hasRuntimeBitsIgnoreComptime(zcu))
+            .{ .signedness = .unsigned, .bits = 1 }
+        else switch (ip.indexToKey(opt_child)) {
+            .ptr_type => |ptr_type| switch (ptr_type.flags.size) {
+                .one, .many => switch (ptr_type.flags.is_allowzero) {
+                    false => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() },
+                    true => null,
+                },
+                .slice, .c => null,
+            },
+            else => null,
+        },
+        .error_union_type => |error_union_type| return if (!Type.fromInterned(error_union_type.payload_type)
+            .hasRuntimeBitsIgnoreComptime(zcu)) .{ .signedness = .unsigned, .bits = zcu.errorSetBits() } else null,
+        .simple_type => |simple_type| return switch (simple_type) {
+            .bool => .{ .signedness = .unsigned, .bits = 1 },
+            .anyerror => .{ .signedness = .unsigned, .bits = zcu.errorSetBits() },
+            .isize => .{ .signedness = .signed, .bits = cg.target.ptrBitWidth() },
+            .usize => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() },
+            .c_char => .{ .signedness = cg.target.charSignedness(), .bits = cg.target.cTypeBitSize(.char) },
+            .c_short => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.short) },
+            .c_ushort => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.short) },
+            .c_int => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.int) },
+            .c_uint => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.int) },
+            .c_long => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.long) },
+            .c_ulong => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.long) },
+            .c_longlong => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.longlong) },
+            .c_ulonglong => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.longlong) },
+            .f16, .f32, .f64, .f80, .f128, .c_longdouble => null,
+            .anyopaque,
+            .void,
+            .type,
+            .comptime_int,
+            .comptime_float,
+            .noreturn,
+            .null,
+            .undefined,
+            .enum_literal,
+            .adhoc_inferred_error_set,
+            .generic_poison,
+            => unreachable,
+        },
+        .struct_type => {
+            const loaded_struct = ip.loadStructType(ty_index);
+            switch (loaded_struct.layout) {
+                .auto, .@"extern" => return null,
+                .@"packed" => ty_index = loaded_struct.backingIntTypeUnordered(ip),
+            }
+        },
+        .union_type => return switch (ip.loadUnionType(ty_index).flagsUnordered(ip).layout) {
+            .auto, .@"extern" => null,
+            .@"packed" => .{ .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)) },
+        },
+        .enum_type => ty_index = ip.loadEnumType(ty_index).tag_ty,
+        .error_set_type, .inferred_error_set_type => return .{ .signedness = .unsigned, .bits = zcu.errorSetBits() },
+        else => return null,
+    };
+}
+
+fn floatBits(cg: *CodeGen, ty: Type) ?u16 {
+    return if (ty.isRuntimeFloat()) ty.floatBits(cg.target.*) else null;
+}
+
+const Temp = struct {
+    index: Air.Inst.Index,
+
+    fn unwrap(temp: Temp, cg: *CodeGen) union(enum) {
+        ref: Air.Inst.Ref,
+        temp: Index,
+        err_ret_trace,
+    } {
+        switch (temp.index.unwrap()) {
+            .ref => |ref| return .{ .ref = ref },
+            .target => |target_index| {
+                if (temp.index == err_ret_trace_index) return .err_ret_trace;
+                const temp_index: Index = @enumFromInt(target_index);
+                assert(temp_index.isValid(cg));
+                return .{ .temp = temp_index };
+            },
+        }
+    }
+
+    fn typeOf(temp: Temp, cg: *CodeGen) Type {
+        return switch (temp.unwrap(cg)) {
+            .ref => switch (cg.air.instructions.items(.tag)[@intFromEnum(temp.index)]) {
+                .loop_switch_br => cg.typeOf(cg.air.unwrapSwitch(temp.index).operand),
+                else => cg.air.typeOfIndex(temp.index, &cg.pt.zcu.intern_pool),
+            },
+            .temp => |temp_index| temp_index.typeOf(cg),
+            .err_ret_trace => .usize,
+        };
+    }
+
+    fn isMut(temp: Temp, cg: *CodeGen) bool {
+        return switch (temp.unwrap(cg)) {
+            .ref, .err_ret_trace => false,
+            .temp => |temp_index| switch (temp_index.tracking(cg).short) {
+                .none,
+                .unreach,
+                .dead,
+                .undef,
+                .immediate,
+                .eflags,
+                .register_offset,
+                .register_mask,
+                .memory,
+                .load_symbol,
+                .lea_symbol,
+                .indirect,
+                .load_direct,
+                .lea_direct,
+                .load_got,
+                .lea_got,
+                .load_tlv,
+                .lea_tlv,
+                .lea_frame,
+                .elementwise_regs_then_frame,
+                .reserved_frame,
+                .air_ref,
+                => false,
+                .register,
+                .register_pair,
+                .register_triple,
+                .register_quadruple,
+                .register_overflow,
+                => true,
+                .load_frame => |frame_addr| !frame_addr.index.isNamed(),
+            },
+        };
+    }
+
+    fn tracking(temp: Temp, cg: *CodeGen) InstTracking {
+        return cg.inst_tracking.get(temp.index).?;
+    }
+
+    fn getOffset(temp: Temp, off: i32, cg: *CodeGen) !Temp {
+        const new_temp_index = cg.next_temp_index;
+        cg.temp_type[@intFromEnum(new_temp_index)] = .usize;
+        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
+        switch (temp.tracking(cg).short) {
+            else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
+            .register => |reg| {
+                const new_reg =
+                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+                try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
+                    .base = .{ .reg = reg.to64() },
+                    .mod = .{ .rm = .{
+                        .size = .qword,
+                        .disp = off,
+                    } },
+                });
+            },
+            .register_offset => |reg_off| {
+                const new_reg =
+                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+                try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
+                    .base = .{ .reg = reg_off.reg.to64() },
+                    .mod = .{ .rm = .{
+                        .size = .qword,
+                        .disp = reg_off.off + off,
+                    } },
+                });
+            },
+            .lea_symbol => |sym_off| new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = .{
+                .sym_index = sym_off.sym_index,
+                .off = sym_off.off + off,
+            } }),
+            .load_frame => |frame_addr| {
+                const new_reg =
+                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+                new_temp_index.tracking(cg).* = .init(.{ .register_offset = .{
+                    .reg = new_reg,
+                    .off = off,
+                } });
+                try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
+                    .base = .{ .frame = frame_addr.index },
+                    .mod = .{ .rm = .{
+                        .size = .qword,
+                        .disp = frame_addr.off,
+                    } },
+                });
+            },
+            .lea_frame => |frame_addr| new_temp_index.tracking(cg).* = .init(.{ .lea_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + off,
+            } }),
+        }
+        return .{ .index = new_temp_index.toIndex() };
+    }
+
+    fn toOffset(temp: *Temp, off: i32, cg: *CodeGen) !void {
+        if (off == 0) return;
+        switch (temp.unwrap(cg)) {
+            .ref, .err_ret_trace => {},
+            .temp => |temp_index| {
+                const temp_tracking = temp_index.tracking(cg);
+                switch (temp_tracking.short) {
+                    else => {},
+                    .register => |reg| {
+                        try cg.freeValue(temp_tracking.long);
+                        temp_tracking.* = .init(.{ .register_offset = .{
+                            .reg = reg,
+                            .off = off,
+                        } });
+                        return;
+                    },
+                    .register_offset => |reg_off| {
+                        try cg.freeValue(temp_tracking.long);
+                        temp_tracking.* = .init(.{ .register_offset = .{
+                            .reg = reg_off.reg,
+                            .off = reg_off.off + off,
+                        } });
+                        return;
+                    },
+                    .lea_symbol => |sym_off| {
+                        assert(std.meta.eql(temp_tracking.long.lea_symbol, sym_off));
+                        temp_tracking.* = .init(.{ .lea_symbol = .{
+                            .sym_index = sym_off.sym_index,
+                            .off = sym_off.off + off,
+                        } });
+                        return;
+                    },
+                    .lea_frame => |frame_addr| {
+                        assert(std.meta.eql(temp_tracking.long.lea_frame, frame_addr));
+                        temp_tracking.* = .init(.{ .lea_frame = .{
+                            .index = frame_addr.index,
+                            .off = frame_addr.off + off,
+                        } });
+                        return;
+                    },
+                }
+            },
+        }
+        const new_temp = try temp.getOffset(off, cg);
+        try temp.die(cg);
+        temp.* = new_temp;
+    }
+
+    fn getLimb(temp: Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !Temp {
+        const new_temp_index = cg.next_temp_index;
+        cg.temp_type[@intFromEnum(new_temp_index)] = limb_ty;
+        switch (temp.tracking(cg).short) {
+            else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
+            .immediate => |imm| {
+                assert(limb_index == 0);
+                new_temp_index.tracking(cg).* = .init(.{ .immediate = imm });
+            },
+            .register => |reg| {
+                assert(limb_index == 0);
+                const new_reg =
+                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+                try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), reg.to64());
+            },
+            .register_pair => |regs| {
+                const new_reg =
+                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+                try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), regs[limb_index].to64());
+            },
+            .register_offset => |reg_off| {
+                assert(limb_index == 0);
+                const new_reg =
+                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+                try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
+                    .base = .{ .reg = reg_off.reg.to64() },
+                    .mod = .{ .rm = .{
+                        .size = .qword,
+                        .disp = reg_off.off + @as(u31, limb_index) * 8,
+                    } },
+                });
+            },
+            .load_symbol => |sym_off| {
+                const new_reg =
+                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+                try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
+                    .base = .{ .reloc = sym_off.sym_index },
+                    .mod = .{ .rm = .{
+                        .size = .qword,
+                        .disp = sym_off.off + @as(u31, limb_index) * 8,
+                    } },
+                });
+            },
+            .lea_symbol => |sym_off| {
+                assert(limb_index == 0);
+                new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = sym_off });
+            },
+            .load_frame => |frame_addr| {
+                const new_reg =
+                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+                try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
+                    .base = .{ .frame = frame_addr.index },
+                    .mod = .{ .rm = .{
+                        .size = .qword,
+                        .disp = frame_addr.off + @as(u31, limb_index) * 8,
+                    } },
+                });
+            },
+            .lea_frame => |frame_addr| {
+                assert(limb_index == 0);
+                new_temp_index.tracking(cg).* = .init(.{ .lea_frame = frame_addr });
+            },
+        }
+        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
+        return .{ .index = new_temp_index.toIndex() };
+    }
+
+    fn toLimb(temp: *Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !void {
+        switch (temp.unwrap(cg)) {
+            .ref => {},
+            .temp => |temp_index| {
+                const temp_tracking = temp_index.tracking(cg);
+                switch (temp_tracking.short) {
+                    else => {},
+                    .register, .lea_symbol, .lea_frame => {
+                        assert(limb_index == 0);
+                        cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
+                        return;
+                    },
+                    .register_pair => |regs| {
+                        switch (temp_tracking.long) {
+                            .none, .reserved_frame => {},
+                            else => temp_tracking.long =
+                                temp_tracking.long.address().offset(@as(u31, limb_index) * 8).deref(),
+                        }
+                        for (regs, 0..) |reg, reg_index| if (reg_index != limb_index)
+                            cg.register_manager.freeReg(reg);
+                        temp_tracking.* = .init(.{ .register = regs[limb_index] });
+                        cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
+                        return;
+                    },
+                    .load_symbol => |sym_off| {
+                        assert(std.meta.eql(temp_tracking.long.load_symbol, sym_off));
+                        temp_tracking.* = .init(.{ .load_symbol = .{
+                            .sym_index = sym_off.sym_index,
+                            .off = sym_off.off + @as(u31, limb_index) * 8,
+                        } });
+                        cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
+                        return;
+                    },
+                    .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) {
+                        assert(std.meta.eql(temp_tracking.long.load_frame, frame_addr));
+                        temp_tracking.* = .init(.{ .load_frame = .{
+                            .index = frame_addr.index,
+                            .off = frame_addr.off + @as(u31, limb_index) * 8,
+                        } });
+                        cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
+                        return;
+                    },
+                }
+            },
+            .err_ret_trace => unreachable,
+        }
+        const new_temp = try temp.getLimb(limb_ty, limb_index, cg);
+        try temp.die(cg);
+        temp.* = new_temp;
+    }
+
+    fn toSlicePtr(temp: *Temp, cg: *CodeGen) !void {
+        const temp_ty = temp.typeOf(cg);
+        if (temp_ty.isSlice(cg.pt.zcu)) try temp.toLimb(temp_ty.slicePtrFieldType(cg.pt.zcu), 0, cg);
+    }
+
+    fn toSliceLen(temp: *Temp, cg: *CodeGen) !void {
+        try temp.toLimb(.usize, 1, cg);
+    }
+
+    fn toReg(temp: *Temp, new_reg: Register, cg: *CodeGen) !bool {
+        const val, const ty: Type = val_ty: switch (temp.unwrap(cg)) {
+            .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) },
+            .temp => |temp_index| {
+                const temp_tracking = temp_index.tracking(cg);
+                if (temp_tracking.short == .register and
+                    temp_tracking.short.register == new_reg) return false;
+                break :val_ty .{ temp_tracking.short, temp_index.typeOf(cg) };
+            },
+            .err_ret_trace => .{ temp.tracking(cg).short, .usize },
+        };
+        const new_temp_index = cg.next_temp_index;
+        try cg.register_manager.getReg(new_reg, new_temp_index.toIndex());
+        cg.temp_type[@intFromEnum(new_temp_index)] = ty;
+        try cg.genSetReg(new_reg, ty, val, .{});
+        new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+        try temp.die(cg);
+        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
+        temp.* = .{ .index = new_temp_index.toIndex() };
+        return true;
+    }
+
+    fn toRegClass(temp: *Temp, mut: bool, rc: Register.Class, cg: *CodeGen) !bool {
+        const val = temp.tracking(cg).short;
+        if (!mut or temp.isMut(cg)) switch (val) {
+            else => {},
+            .register => |reg| if (reg.class() == rc) return false,
+            .register_offset => |reg_off| if (reg_off.reg.class() == rc and reg_off.off == 0) return false,
+        };
+        const ty = temp.typeOf(cg);
+        const new_temp_index = cg.next_temp_index;
+        cg.temp_type[@intFromEnum(new_temp_index)] = ty;
+        const new_reg = try cg.register_manager.allocReg(new_temp_index.toIndex(), regSetForRegClass(rc));
+        try cg.genSetReg(new_reg, ty, val, .{});
+        new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
+        try temp.die(cg);
+        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
+        temp.* = .{ .index = new_temp_index.toIndex() };
+        return true;
+    }
+
+    fn toPair(first_temp: *Temp, second_temp: *Temp, cg: *CodeGen) !void {
+        while (true) for ([_]*Temp{ first_temp, second_temp }) |part_temp| {
+            if (try part_temp.toRegClass(true, .general_purpose, cg)) break;
+        } else break;
+        const first_temp_tracking = first_temp.unwrap(cg).temp.tracking(cg);
+        const second_temp_tracking = second_temp.unwrap(cg).temp.tracking(cg);
+        const result: MCValue = .{ .register_pair = .{
+            first_temp_tracking.short.register,
+            second_temp_tracking.short.register,
+        } };
+        const result_temp_index = cg.next_temp_index;
+        const result_temp: Temp = .{ .index = result_temp_index.toIndex() };
+        assert(cg.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking));
+        assert(cg.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking));
+        cg.temp_type[@intFromEnum(result_temp_index)] = .slice_const_u8;
+        result_temp_index.tracking(cg).* = .init(result);
+        first_temp.* = result_temp;
+        second_temp.* = result_temp;
+    }
+
+    fn asMask(temp: Temp, info: MaskInfo, cg: *CodeGen) void {
+        assert(info.scalar != .none);
+        const mcv = &temp.unwrap(cg).temp.tracking(cg).short;
+        const reg = mcv.register;
+        mcv.* = .{ .register_mask = .{ .reg = reg, .info = info } };
+    }
 
-fn floatLibcAbiSuffix(ty: Type) []const u8 {
-    return switch (ty.toIntern()) {
-        .f16_type => "h",
-        .f32_type => "f",
-        .f64_type => "",
-        .f80_type => "x",
-        .f128_type => "q",
-        .c_longdouble_type => "l",
-        else => unreachable,
-    };
-}
+    fn toLea(temp: *Temp, cg: *CodeGen) !bool {
+        switch (temp.tracking(cg).short) {
+            .none,
+            .unreach,
+            .dead,
+            .undef,
+            .eflags,
+            .register_pair,
+            .register_triple,
+            .register_quadruple,
+            .register_overflow,
+            .register_mask,
+            .elementwise_regs_then_frame,
+            .reserved_frame,
+            .air_ref,
+            => unreachable, // not a valid pointer
+            .immediate,
+            .register,
+            .register_offset,
+            .lea_direct,
+            .lea_got,
+            .lea_tlv,
+            .lea_frame,
+            => return false,
+            .memory,
+            .indirect,
+            .load_symbol,
+            .load_direct,
+            .load_got,
+            .load_tlv,
+            .load_frame,
+            => return temp.toRegClass(true, .general_purpose, cg),
+            .lea_symbol => |sym_off| {
+                const off = sym_off.off;
+                if (off == 0) return false;
+                try temp.toOffset(-off, cg);
+                while (try temp.toRegClass(true, .general_purpose, cg)) {}
+                try temp.toOffset(off, cg);
+                return true;
+            },
+        }
+    }
 
-fn promoteInt(self: *CodeGen, ty: Type) Type {
-    const pt = self.pt;
-    const zcu = pt.zcu;
-    const int_info: InternPool.Key.IntType = switch (ty.toIntern()) {
-        .bool_type => .{ .signedness = .unsigned, .bits = 1 },
-        else => if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else return ty,
-    };
-    for ([_]Type{
-        .c_int,      .c_uint,
-        .c_long,     .c_ulong,
-        .c_longlong, .c_ulonglong,
-    }) |promote_ty| {
-        const promote_info = promote_ty.intInfo(zcu);
-        if (int_info.signedness == .signed and promote_info.signedness == .unsigned) continue;
-        if (int_info.bits + @intFromBool(int_info.signedness == .unsigned and
-            promote_info.signedness == .signed) <= promote_info.bits) return promote_ty;
+    fn toMemory(temp: *Temp, cg: *CodeGen) !bool {
+        const temp_tracking = temp.tracking(cg);
+        if (temp_tracking.short.isMemory()) return false;
+        const new_temp_index = cg.next_temp_index;
+        const ty = temp.typeOf(cg);
+        cg.temp_type[@intFromEnum(new_temp_index)] = ty;
+        const new_frame_index = try cg.allocFrameIndex(.initSpill(ty, cg.pt.zcu));
+        try cg.genSetMem(.{ .frame = new_frame_index }, 0, ty, temp_tracking.short, .{});
+        new_temp_index.tracking(cg).* = .init(.{ .load_frame = .{ .index = new_frame_index } });
+        try temp.die(cg);
+        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
+        temp.* = .{ .index = new_temp_index.toIndex() };
+        return true;
     }
-    return ty;
-}
 
-fn promoteVarArg(self: *CodeGen, ty: Type) Type {
-    if (!ty.isRuntimeFloat()) return self.promoteInt(ty);
-    switch (ty.floatBits(self.target.*)) {
-        32, 64 => return .f64,
-        else => |float_bits| {
-            assert(float_bits == self.target.cTypeBitSize(.longdouble));
-            return .c_longdouble;
-        },
+    // hack around linker relocation bugs
+    fn toBase(temp: *Temp, cg: *CodeGen) !bool {
+        const temp_tracking = temp.tracking(cg);
+        if (temp_tracking.short.isBase()) return false;
+        if (try temp.toMemory(cg)) return true;
+        const new_temp_index = cg.next_temp_index;
+        cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg);
+        const new_reg =
+            try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
+        try cg.genSetReg(new_reg, .usize, temp_tracking.short.address(), .{});
+        new_temp_index.tracking(cg).* = .init(.{ .indirect = .{ .reg = new_reg } });
+        try temp.die(cg);
+        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
+        temp.* = .{ .index = new_temp_index.toIndex() };
+        return true;
     }
-}
 
-fn unalignedSize(cg: *CodeGen, ty: Type) u64 {
-    const zcu = cg.pt.zcu;
-    return switch (zcu.intern_pool.indexToKey(ty.toIntern())) {
-        .vector_type => |vector_type| Type.fromInterned(vector_type.child).abiSize(zcu) * vector_type.len,
-        else => ty.abiSize(zcu),
+    const AccessOptions = struct {
+        disp: i32 = 0,
+        safe: bool = false,
     };
-}
 
-fn intInfo(cg: *CodeGen, ty: Type) ?std.builtin.Type.Int {
-    const zcu = cg.pt.zcu;
-    const ip = &zcu.intern_pool;
-    var ty_index = ty.ip_index;
-    while (true) switch (ip.indexToKey(ty_index)) {
-        .int_type => |int_type| return int_type,
-        .ptr_type => |ptr_type| return switch (ptr_type.flags.size) {
-            .one, .many, .c => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() },
-            .slice => null,
-        },
-        .opt_type => |opt_child| return if (!Type.fromInterned(opt_child).hasRuntimeBitsIgnoreComptime(zcu))
-            .{ .signedness = .unsigned, .bits = 1 }
-        else switch (ip.indexToKey(opt_child)) {
-            .ptr_type => |ptr_type| switch (ptr_type.flags.size) {
-                .one, .many => switch (ptr_type.flags.is_allowzero) {
-                    false => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() },
-                    true => null,
-                },
-                .slice, .c => null,
+    fn load(ptr: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp {
+        const val = try cg.tempAlloc(val_ty);
+        try ptr.toOffset(opts.disp, cg);
+        while (try ptr.toLea(cg)) {}
+        const val_mcv = val.tracking(cg).short;
+        switch (val_mcv) {
+            else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
+            .register => |val_reg| try ptr.loadReg(val_ty, registerAlias(
+                val_reg,
+                @intCast(val_ty.abiSize(cg.pt.zcu)),
+            ), cg),
+            inline .register_pair,
+            .register_triple,
+            .register_quadruple,
+            => |val_regs| for (val_regs) |val_reg| {
+                try ptr.loadReg(val_ty, val_reg, cg);
+                try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg);
+                while (try ptr.toLea(cg)) {}
             },
-            else => null,
-        },
-        .error_union_type => |error_union_type| return if (!Type.fromInterned(error_union_type.payload_type)
-            .hasRuntimeBitsIgnoreComptime(zcu)) .{ .signedness = .unsigned, .bits = zcu.errorSetBits() } else null,
-        .simple_type => |simple_type| return switch (simple_type) {
-            .bool => .{ .signedness = .unsigned, .bits = 1 },
-            .anyerror => .{ .signedness = .unsigned, .bits = zcu.errorSetBits() },
-            .isize => .{ .signedness = .signed, .bits = cg.target.ptrBitWidth() },
-            .usize => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() },
-            .c_char => .{ .signedness = cg.target.charSignedness(), .bits = cg.target.cTypeBitSize(.char) },
-            .c_short => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.short) },
-            .c_ushort => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.short) },
-            .c_int => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.int) },
-            .c_uint => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.int) },
-            .c_long => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.long) },
-            .c_ulong => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.long) },
-            .c_longlong => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.longlong) },
-            .c_ulonglong => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.longlong) },
-            .f16, .f32, .f64, .f80, .f128, .c_longdouble => null,
-            .anyopaque,
-            .void,
-            .type,
-            .comptime_int,
-            .comptime_float,
-            .noreturn,
-            .null,
-            .undefined,
-            .enum_literal,
-            .adhoc_inferred_error_set,
-            .generic_poison,
-            => unreachable,
-        },
-        .struct_type => {
-            const loaded_struct = ip.loadStructType(ty_index);
-            switch (loaded_struct.layout) {
-                .auto, .@"extern" => return null,
-                .@"packed" => ty_index = loaded_struct.backingIntTypeUnordered(ip),
+            .register_offset => |val_reg_off| switch (val_reg_off.off) {
+                0 => try ptr.loadReg(val_ty, registerAlias(
+                    val_reg_off.reg,
+                    @intCast(val_ty.abiSize(cg.pt.zcu)),
+                ), cg),
+                else => unreachable,
+            },
+            .memory, .indirect, .load_frame, .load_symbol => {
+                var val_ptr = try cg.tempInit(.usize, val_mcv.address());
+                var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
+                try val_ptr.memcpy(ptr, &len, cg);
+                try val_ptr.die(cg);
+                try len.die(cg);
+            },
+        }
+        return val;
+    }
+
+    fn store(ptr: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void {
+        const val_ty = val.typeOf(cg);
+        try ptr.toOffset(opts.disp, cg);
+        while (try ptr.toLea(cg)) {}
+        val_to_gpr: while (true) : (while (try ptr.toLea(cg) or
+            try val.toRegClass(false, .general_purpose, cg))
+        {}) {
+            const val_mcv = val.tracking(cg).short;
+            switch (val_mcv) {
+                else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
+                .undef => if (opts.safe) {
+                    var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa });
+                    var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
+                    try ptr.memset(&pat, &len, cg);
+                    try pat.die(cg);
+                    try len.die(cg);
+                },
+                .immediate => |val_imm| {
+                    const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31|
+                        .u(val_uimm31)
+                    else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32|
+                        .s(val_simm32)
+                    else
+                        continue :val_to_gpr;
+                    // hack around linker relocation bugs
+                    switch (ptr.tracking(cg).short) {
+                        else => {},
+                        .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
+                    }
+                    try cg.asmMemoryImmediate(
+                        .{ ._, .mov },
+                        try ptr.tracking(cg).short.deref().mem(cg, .{
+                            .size = cg.memSize(val_ty),
+                        }),
+                        val_op,
+                    );
+                },
+                .eflags => |cc| {
+                    // hack around linker relocation bugs
+                    switch (ptr.tracking(cg).short) {
+                        else => {},
+                        .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
+                    }
+                    try cg.asmSetccMemory(
+                        cc,
+                        try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }),
+                    );
+                },
+                .register => |val_reg| try ptr.storeRegs(val_ty, &.{registerAlias(
+                    val_reg,
+                    @intCast(val_ty.abiSize(cg.pt.zcu)),
+                )}, cg),
+                inline .register_pair,
+                .register_triple,
+                .register_quadruple,
+                => |val_regs| try ptr.storeRegs(val_ty, &val_regs, cg),
+                .register_offset => |val_reg_off| switch (val_reg_off.off) {
+                    0 => try ptr.storeRegs(val_ty, &.{registerAlias(
+                        val_reg_off.reg,
+                        @intCast(val_ty.abiSize(cg.pt.zcu)),
+                    )}, cg),
+                    else => continue :val_to_gpr,
+                },
+                .register_overflow => |val_reg_ov| {
+                    const ip = &cg.pt.zcu.intern_pool;
+                    const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) {
+                        .tuple_type => |tuple_type| {
+                            const tuple_field_types = tuple_type.types.get(ip);
+                            assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type);
+                            break :first_ty tuple_field_types[0];
+                        },
+                        .opt_type => |opt_child| {
+                            assert(!val_ty.optionalReprIsPayload(cg.pt.zcu));
+                            break :first_ty opt_child;
+                        },
+                        else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
+                    });
+                    const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
+                    try ptr.storeRegs(first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg);
+                    try ptr.toOffset(first_size, cg);
+                    try cg.asmSetccMemory(
+                        val_reg_ov.eflags,
+                        try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }),
+                    );
+                },
+                .lea_frame, .lea_symbol => continue :val_to_gpr,
+                .memory, .indirect, .load_frame, .load_symbol => {
+                    var val_ptr = try cg.tempInit(.usize, val_mcv.address());
+                    var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
+                    try ptr.memcpy(&val_ptr, &len, cg);
+                    try val_ptr.die(cg);
+                    try len.die(cg);
+                },
             }
-        },
-        .union_type => return switch (ip.loadUnionType(ty_index).flagsUnordered(ip).layout) {
-            .auto, .@"extern" => null,
-            .@"packed" => .{ .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)) },
-        },
-        .enum_type => ty_index = ip.loadEnumType(ty_index).tag_ty,
-        .error_set_type, .inferred_error_set_type => return .{ .signedness = .unsigned, .bits = zcu.errorSetBits() },
-        else => return null,
-    };
-}
+            break;
+        }
+    }
 
-fn floatBits(cg: *CodeGen, ty: Type) ?u16 {
-    return if (ty.isRuntimeFloat()) ty.floatBits(cg.target.*) else null;
-}
+    fn read(src: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp {
+        var val = try cg.tempAlloc(val_ty);
+        while (try src.toBase(cg)) {}
+        const val_mcv = val.tracking(cg).short;
+        switch (val_mcv) {
+            else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
+            .register => |val_reg| try src.readReg(opts.disp, val_ty, registerAlias(
+                val_reg,
+                @intCast(val_ty.abiSize(cg.pt.zcu)),
+            ), cg),
+            inline .register_pair, .register_triple, .register_quadruple => |val_regs| {
+                var disp = opts.disp;
+                for (val_regs) |val_reg| {
+                    try src.readReg(disp, val_ty, val_reg, cg);
+                    disp += @divExact(val_reg.bitSize(), 8);
+                }
+            },
+            .register_offset => |val_reg_off| switch (val_reg_off.off) {
+                0 => try src.readReg(opts.disp, val_ty, registerAlias(
+                    val_reg_off.reg,
+                    @intCast(val_ty.abiSize(cg.pt.zcu)),
+                ), cg),
+                else => unreachable,
+            },
+            .memory, .indirect, .load_frame, .load_symbol => {
+                var val_ptr = try cg.tempInit(.usize, val_mcv.address());
+                var src_ptr =
+                    try cg.tempInit(.usize, src.tracking(cg).short.address().offset(opts.disp));
+                var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
+                try val_ptr.memcpy(&src_ptr, &len, cg);
+                try val_ptr.die(cg);
+                try src_ptr.die(cg);
+                try len.die(cg);
+            },
+        }
+        return val;
+    }
 
-const Temp = struct {
-    index: Air.Inst.Index,
+    fn write(dst: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void {
+        const val_ty = val.typeOf(cg);
+        while (try dst.toBase(cg)) {}
+        val_to_gpr: while (true) : (while (try dst.toBase(cg) or
+            try val.toRegClass(false, .general_purpose, cg))
+        {}) {
+            const val_mcv = val.tracking(cg).short;
+            switch (val_mcv) {
+                else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
+                .undef => if (opts.safe) {
+                    var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp));
+                    var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa });
+                    var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
+                    try dst_ptr.memset(&pat, &len, cg);
+                    try dst_ptr.die(cg);
+                    try pat.die(cg);
+                    try len.die(cg);
+                },
+                .immediate => |val_imm| {
+                    const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31|
+                        .u(val_uimm31)
+                    else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32|
+                        .s(val_simm32)
+                    else
+                        continue :val_to_gpr;
+                    try cg.asmMemoryImmediate(
+                        .{ ._, .mov },
+                        try dst.tracking(cg).short.mem(cg, .{
+                            .size = cg.memSize(val_ty),
+                            .disp = opts.disp,
+                        }),
+                        val_op,
+                    );
+                },
+                .eflags => |cc| try cg.asmSetccMemory(
+                    cc,
+                    try dst.tracking(cg).short.mem(cg, .{
+                        .size = .byte,
+                        .disp = opts.disp,
+                    }),
+                ),
+                .register => |val_reg| try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
+                    val_reg,
+                    @intCast(val_ty.abiSize(cg.pt.zcu)),
+                )}, cg),
+                inline .register_pair,
+                .register_triple,
+                .register_quadruple,
+                => |val_regs| try dst.writeRegs(opts.disp, val_ty, &val_regs, cg),
+                .register_offset => |val_reg_off| switch (val_reg_off.off) {
+                    0 => try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
+                        val_reg_off.reg,
+                        @intCast(val_ty.abiSize(cg.pt.zcu)),
+                    )}, cg),
+                    else => continue :val_to_gpr,
+                },
+                .register_overflow => |val_reg_ov| {
+                    const ip = &cg.pt.zcu.intern_pool;
+                    const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) {
+                        .tuple_type => |tuple_type| {
+                            const tuple_field_types = tuple_type.types.get(ip);
+                            assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type);
+                            break :first_ty tuple_field_types[0];
+                        },
+                        .opt_type => |opt_child| {
+                            assert(!val_ty.optionalReprIsPayload(cg.pt.zcu));
+                            break :first_ty opt_child;
+                        },
+                        else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
+                    });
+                    const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
+                    try dst.writeRegs(opts.disp, first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg);
+                    try cg.asmSetccMemory(
+                        val_reg_ov.eflags,
+                        try dst.tracking(cg).short.mem(cg, .{
+                            .size = .byte,
+                            .disp = opts.disp + first_size,
+                        }),
+                    );
+                },
+                .lea_frame, .lea_symbol => continue :val_to_gpr,
+                .memory, .indirect, .load_frame, .load_symbol => {
+                    var dst_ptr =
+                        try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp));
+                    var val_ptr = try cg.tempInit(.usize, val_mcv.address());
+                    var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
+                    try dst_ptr.memcpy(&val_ptr, &len, cg);
+                    try dst_ptr.die(cg);
+                    try val_ptr.die(cg);
+                    try len.die(cg);
+                },
+            }
+            break;
+        }
+    }
 
-    fn unwrap(temp: Temp, cg: *CodeGen) union(enum) {
-        ref: Air.Inst.Ref,
-        temp: Index,
-        err_ret_trace,
-    } {
-        switch (temp.index.unwrap()) {
-            .ref => |ref| return .{ .ref = ref },
-            .target => |target_index| {
-                if (temp.index == err_ret_trace_index) return .err_ret_trace;
-                const temp_index: Index = @enumFromInt(target_index);
-                assert(temp_index.isValid(cg));
-                return .{ .temp = temp_index };
-            },
+    fn loadReg(ptr: *Temp, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void {
+        const dst_rc = dst_reg.class();
+        const strat = try cg.moveStrategy(dst_ty, dst_rc, false);
+        // hack around linker relocation bugs
+        switch (ptr.tracking(cg).short) {
+            else => {},
+            .lea_symbol => |sym_off| if (dst_rc != .general_purpose or sym_off.off != 0)
+                while (try ptr.toRegClass(false, .general_purpose, cg)) {},
+        }
+        try strat.read(cg, dst_reg, try ptr.tracking(cg).short.deref().mem(cg, .{
+            .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())),
+        }));
+    }
+
+    fn storeRegs(ptr: *Temp, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void {
+        var part_disp: u31 = 0;
+        var deferred_disp: u31 = 0;
+        var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu));
+        for (src_regs) |src_reg| {
+            const src_rc = src_reg.class();
+            const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize());
+            const part_size = @divExact(part_bit_size, 8);
+            if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) {
+                // hack around linker relocation bugs
+                switch (ptr.tracking(cg).short) {
+                    else => {},
+                    .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
+                }
+                const strat = try cg.moveStrategy(src_ty, src_rc, false);
+                try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{
+                    .size = .fromBitSize(part_bit_size),
+                    .disp = part_disp,
+                }), registerAlias(src_reg, part_size));
+            } else {
+                const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size);
+                const frame_index = try cg.allocFrameIndex(.init(.{
+                    .size = frame_size,
+                    .alignment = .fromNonzeroByteUnits(frame_size),
+                }));
+                const strat = try cg.moveStrategy(src_ty, src_rc, true);
+                try strat.write(cg, .{
+                    .base = .{ .frame = frame_index },
+                    .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
+                }, registerAlias(src_reg, frame_size));
+                try ptr.toOffset(deferred_disp, cg);
+                deferred_disp = 0;
+                var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
+                var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
+                try ptr.memcpy(&src_ptr, &len, cg);
+                try src_ptr.die(cg);
+                try len.die(cg);
+            }
+            part_disp += part_size;
+            deferred_disp += part_size;
+            src_abi_size -= part_size;
         }
     }
 
-    fn typeOf(temp: Temp, cg: *CodeGen) Type {
-        return switch (temp.unwrap(cg)) {
-            .ref => switch (cg.air.instructions.items(.tag)[@intFromEnum(temp.index)]) {
-                .loop_switch_br => cg.typeOf(cg.air.unwrapSwitch(temp.index).operand),
-                else => cg.air.typeOfIndex(temp.index, &cg.pt.zcu.intern_pool),
-            },
-            .temp => |temp_index| temp_index.typeOf(cg),
-            .err_ret_trace => .usize,
-        };
+    fn readReg(src: Temp, disp: i32, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void {
+        const strat = try cg.moveStrategy(dst_ty, dst_reg.class(), false);
+        try strat.read(cg, dst_reg, try src.tracking(cg).short.mem(cg, .{
+            .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())),
+            .disp = disp,
+        }));
     }
 
-    fn isMut(temp: Temp, cg: *CodeGen) bool {
-        return switch (temp.unwrap(cg)) {
-            .ref, .err_ret_trace => false,
-            .temp => |temp_index| switch (temp_index.tracking(cg).short) {
-                .none,
-                .unreach,
-                .dead,
-                .undef,
-                .immediate,
-                .eflags,
-                .register_offset,
-                .register_mask,
-                .memory,
-                .load_symbol,
-                .lea_symbol,
-                .indirect,
-                .load_direct,
-                .lea_direct,
-                .load_got,
-                .lea_got,
-                .load_tlv,
-                .lea_tlv,
-                .lea_frame,
-                .elementwise_regs_then_frame,
-                .reserved_frame,
-                .air_ref,
-                => false,
-                .register,
-                .register_pair,
-                .register_triple,
-                .register_quadruple,
-                .register_overflow,
-                => true,
-                .load_frame => |frame_addr| !frame_addr.index.isNamed(),
-            },
-        };
+    fn writeRegs(dst: Temp, disp: i32, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void {
+        var part_disp = disp;
+        var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu));
+        for (src_regs) |src_reg| {
+            const src_rc = src_reg.class();
+            const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize());
+            const part_size = @divExact(part_bit_size, 8);
+            if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) {
+                const strat = try cg.moveStrategy(src_ty, src_rc, false);
+                try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
+                    .size = .fromBitSize(part_bit_size),
+                    .disp = part_disp,
+                }), registerAlias(src_reg, part_size));
+            } else {
+                const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size);
+                const frame_index = try cg.allocFrameIndex(.init(.{
+                    .size = frame_size,
+                    .alignment = .fromNonzeroByteUnits(frame_size),
+                }));
+                const strat = try cg.moveStrategy(src_ty, src_rc, true);
+                try strat.write(cg, .{
+                    .base = .{ .frame = frame_index },
+                    .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
+                }, registerAlias(src_reg, frame_size));
+                var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
+                try dst_ptr.toOffset(part_disp, cg);
+                var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
+                var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
+                try dst_ptr.memcpy(&src_ptr, &len, cg);
+                try dst_ptr.die(cg);
+                try src_ptr.die(cg);
+                try len.die(cg);
+            }
+            part_disp += part_size;
+            src_abi_size -= part_size;
+        }
     }
 
-    fn tracking(temp: Temp, cg: *CodeGen) InstTracking {
-        return cg.inst_tracking.get(temp.index).?;
+    fn memcpy(dst: *Temp, src: *Temp, len: *Temp, cg: *CodeGen) !void {
+        while (true) for ([_]*Temp{ dst, src, len }, [_]Register{ .rdi, .rsi, .rcx }) |temp, reg| {
+            if (try temp.toReg(reg, cg)) break;
+        } else break;
+        try cg.asmOpOnly(.{ .@"rep _sb", .mov });
     }
 
-    fn getOffset(temp: Temp, off: i32, cg: *CodeGen) !Temp {
-        const new_temp_index = cg.next_temp_index;
-        cg.temp_type[@intFromEnum(new_temp_index)] = .usize;
-        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
-        switch (temp.tracking(cg).short) {
-            else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
-            .register => |reg| {
-                const new_reg =
-                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-                try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
-                    .base = .{ .reg = reg.to64() },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = off,
+    fn memset(dst: *Temp, val: *Temp, len: *Temp, cg: *CodeGen) !void {
+        while (true) for ([_]*Temp{ dst, val, len }, [_]Register{ .rdi, .rax, .rcx }) |temp, reg| {
+            if (try temp.toReg(reg, cg)) break;
+        } else break;
+        try cg.asmOpOnly(.{ .@"rep _sb", .sto });
+    }
+
+    /// Supports any `op` using `cg.intInfo(lhs.typeOf(cg)).?.signedness` as the signedness.
+    /// Returns `error.SelectFailed` when `cg.intInfo(lhs.typeOf(cg)) == null`.
+    fn cmpInts(lhs: *Temp, op: std.math.CompareOperator, rhs: *Temp, cg: *CodeGen) !Temp {
+        var ops: [2]Temp = .{ lhs.*, rhs.* };
+        var res: [1]Temp = undefined;
+        switch (op) {
+            .lt, .lte, .gte, .gt => {
+                const commute = switch (op) {
+                    .lt, .gte => false,
+                    .lte, .gt => true,
+                    else => unreachable,
+                };
+                if (commute) std.mem.swap(Temp, &ops[0], &ops[1]);
+                try cg.select(&res, &.{.bool}, &ops, comptime &.{ .{
+                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{.{ .cc = .g }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
                     } },
-                });
-            },
-            .register_offset => |reg_off| {
-                const new_reg =
-                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-                try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
-                    .base = .{ .reg = reg_off.reg.to64() },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = reg_off.off + off,
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm8 } },
+                        .{ .src = .{ .to_gpr, .imm8 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .l }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{.{ .cc = .a }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm8 } },
+                        .{ .src = .{ .to_gpr, .imm8 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .b }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{.{ .cc = .g }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm16 } },
+                        .{ .src = .{ .to_gpr, .imm16 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .l }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{.{ .cc = .a }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm16 } },
+                        .{ .src = .{ .to_gpr, .imm16 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .b }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{.{ .cc = .g }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm32 } },
+                        .{ .src = .{ .to_gpr, .imm32 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .l }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{.{ .cc = .a }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm32 } },
+                        .{ .src = .{ .to_gpr, .imm32 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .b }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{.{ .cc = .g }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .simm32 } },
+                        .{ .src = .{ .to_gpr, .simm32 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .l }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{.{ .cc = .a }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .simm32 } },
+                        .{ .src = .{ .to_gpr, .simm32 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .b }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{
+                        .{ .signed_remainder_int = .{ .of = .qword, .is = .qword } },
+                        .{ .signed_remainder_int = .{ .of = .qword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .l }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -8), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{
+                        .{ .unsigned_remainder_int = .{ .of = .qword, .is = .qword } },
+                        .{ .unsigned_remainder_int = .{ .of = .qword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .b }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .signed_remainder_int = .{ .of = .dword, .is = .dword } },
+                        .{ .signed_remainder_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .l }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_4), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"4", .tmp0, .add_size, -4), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"4", .tmp0, .add_size, -4), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -4), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -4), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .unsigned_remainder_int = .{ .of = .dword, .is = .dword } },
+                        .{ .unsigned_remainder_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .b }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_4), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"4", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"4", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
-                });
-            },
-            .lea_symbol => |sym_off| new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = .{
-                .sym_index = sym_off.sym_index,
-                .off = sym_off.off + off,
-            } }),
-            .load_frame => |frame_addr| {
-                const new_reg =
-                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-                new_temp_index.tracking(cg).* = .init(.{ .register_offset = .{
-                    .reg = new_reg,
-                    .off = off,
                 } });
-                try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
-                    .base = .{ .frame = frame_addr.index },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = frame_addr.off,
+                if (commute) std.mem.swap(Temp, &ops[0], &ops[1]);
+            },
+            .eq, .neq => {
+                try cg.select(&res, &.{.bool}, &ops, comptime &.{ .{
+                    .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm8 } },
+                        .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .imm8 } },
+                        .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .cc = .e }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
                     } },
-                });
-            },
-            .lea_frame => |frame_addr| new_temp_index.tracking(cg).* = .init(.{ .lea_frame = .{
-                .index = frame_addr.index,
-                .off = frame_addr.off + off,
-            } }),
-        }
-        return .{ .index = new_temp_index.toIndex() };
-    }
-
-    fn toOffset(temp: *Temp, off: i32, cg: *CodeGen) !void {
-        if (off == 0) return;
-        switch (temp.unwrap(cg)) {
-            .ref, .err_ret_trace => {},
-            .temp => |temp_index| {
-                const temp_tracking = temp_index.tracking(cg);
-                switch (temp_tracking.short) {
-                    else => {},
-                    .register => |reg| {
-                        try cg.freeValue(temp_tracking.long);
-                        temp_tracking.* = .init(.{ .register_offset = .{
-                            .reg = reg,
-                            .off = off,
-                        } });
-                        return;
+                }, .{
+                    .src_constraints = .{ .{ .int = .word }, .{ .int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm16 } },
+                        .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .imm16 } },
+                        .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
                     },
-                    .register_offset => |reg_off| {
-                        try cg.freeValue(temp_tracking.long);
-                        temp_tracking.* = .init(.{ .register_offset = .{
-                            .reg = reg_off.reg,
-                            .off = reg_off.off + off,
-                        } });
-                        return;
+                    .dst_temps = .{.{ .cc = .e }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm32 } },
+                        .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .imm32 } },
+                        .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
                     },
-                    .lea_symbol => |sym_off| {
-                        assert(std.meta.eql(temp_tracking.long.lea_symbol, sym_off));
-                        temp_tracking.* = .init(.{ .lea_symbol = .{
-                            .sym_index = sym_off.sym_index,
-                            .off = sym_off.off + off,
-                        } });
-                        return;
+                    .dst_temps = .{.{ .cc = .e }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .simm32 } },
+                        .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .simm32 } },
+                        .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .to_gpr } },
                     },
-                    .lea_frame => |frame_addr| {
-                        assert(std.meta.eql(temp_tracking.long.lea_frame, frame_addr));
-                        temp_tracking.* = .init(.{ .lea_frame = .{
-                            .index = frame_addr.index,
-                            .off = frame_addr.off + off,
-                        } });
-                        return;
+                    .dst_temps = .{.{ .cc = .e }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, .mmx, null, null },
+                    .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_mm, .mem } },
+                        .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_mm, .to_mm } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .mmx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
+                        .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ },
+                        .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ },
+                        .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
+                        .{ ._, ._, .cmp, .tmp0b, .si(-1), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_xmm, .mem } },
+                        .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_xmm, .to_xmm } },
+                    },
+                    .extra_temps = .{
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
-                }
-            },
-        }
-        const new_temp = try temp.getOffset(off, cg);
-        try temp.die(cg);
-        temp.* = new_temp;
-    }
-
-    fn getLimb(temp: Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !Temp {
-        const new_temp_index = cg.next_temp_index;
-        cg.temp_type[@intFromEnum(new_temp_index)] = limb_ty;
-        switch (temp.tracking(cg).short) {
-            else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
-            .immediate => |imm| {
-                assert(limb_index == 0);
-                new_temp_index.tracking(cg).* = .init(.{ .immediate = imm });
-            },
-            .register => |reg| {
-                assert(limb_index == 0);
-                const new_reg =
-                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-                try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), reg.to64());
-            },
-            .register_pair => |regs| {
-                const new_reg =
-                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-                try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), regs[limb_index].to64());
-            },
-            .register_offset => |reg_off| {
-                assert(limb_index == 0);
-                const new_reg =
-                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-                try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{
-                    .base = .{ .reg = reg_off.reg.to64() },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = reg_off.off + @as(u31, limb_index) * 8,
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ },
+                        .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ },
                     } },
-                });
-            },
-            .load_symbol => |sym_off| {
-                const new_reg =
-                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-                try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
-                    .base = .{ .reloc = sym_off.sym_index },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = sym_off.off + @as(u31, limb_index) * 8,
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_xmm, .mem } },
+                        .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_xmm, .to_xmm } },
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ },
                     } },
-                });
-            },
-            .lea_symbol => |sym_off| {
-                assert(limb_index == 0);
-                new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = sym_off });
-            },
-            .load_frame => |frame_addr| {
-                const new_reg =
-                    try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-                new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-                try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{
-                    .base = .{ .frame = frame_addr.index },
-                    .mod = .{ .rm = .{
-                        .size = .qword,
-                        .disp = frame_addr.off + @as(u31, limb_index) * 8,
+                }, .{
+                    .required_features = .{ .sse2, .fast_imm16, null, null },
+                    .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_xmm, .mem } },
+                        .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_xmm, .to_xmm } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
+                        .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ },
+                        .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
+                        .{ ._, ._, .cmp, .tmp0w, .si(-1), ._, ._ },
                     } },
-                });
-            },
-            .lea_frame => |frame_addr| {
-                assert(limb_index == 0);
-                new_temp_index.tracking(cg).* = .init(.{ .lea_frame = frame_addr });
-            },
-        }
-        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
-        return .{ .index = new_temp_index.toIndex() };
-    }
-
-    fn toLimb(temp: *Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !void {
-        switch (temp.unwrap(cg)) {
-            .ref => {},
-            .temp => |temp_index| {
-                const temp_tracking = temp_index.tracking(cg);
-                switch (temp_tracking.short) {
-                    else => {},
-                    .register, .lea_symbol, .lea_frame => {
-                        assert(limb_index == 0);
-                        cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
-                        return;
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_xmm, .mem } },
+                        .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_xmm, .to_xmm } },
                     },
-                    .register_pair => |regs| {
-                        switch (temp_tracking.long) {
-                            .none, .reserved_frame => {},
-                            else => temp_tracking.long =
-                                temp_tracking.long.address().offset(@as(u31, limb_index) * 8).deref(),
-                        }
-                        for (regs, 0..) |reg, reg_index| if (reg_index != limb_index)
-                            cg.register_manager.freeReg(reg);
-                        temp_tracking.* = .init(.{ .register = regs[limb_index] });
-                        cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
-                        return;
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
-                    .load_symbol => |sym_off| {
-                        assert(std.meta.eql(temp_tracking.long.load_symbol, sym_off));
-                        temp_tracking.* = .init(.{ .load_symbol = .{
-                            .sym_index = sym_off.sym_index,
-                            .off = sym_off.off + @as(u31, limb_index) * 8,
-                        } });
-                        cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
-                        return;
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ },
+                        .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ },
+                        .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ },
+                        .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_ymm, .mem } },
+                        .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_ymm, .to_ymm } },
                     },
-                    .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) {
-                        assert(std.meta.eql(temp_tracking.long.load_frame, frame_addr));
-                        temp_tracking.* = .init(.{ .load_frame = .{
-                            .index = frame_addr.index,
-                            .off = frame_addr.off + @as(u31, limb_index) * 8,
-                        } });
-                        cg.temp_type[@intFromEnum(temp_index)] = limb_ty;
-                        return;
+                    .extra_temps = .{
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
-                }
-            },
-            .err_ret_trace => unreachable,
-        }
-        const new_temp = try temp.getLimb(limb_ty, limb_index, cg);
-        try temp.die(cg);
-        temp.* = new_temp;
-    }
-
-    fn toSlicePtr(temp: *Temp, cg: *CodeGen) !void {
-        const temp_ty = temp.typeOf(cg);
-        if (temp_ty.isSlice(cg.pt.zcu)) try temp.toLimb(temp_ty.slicePtrFieldType(cg.pt.zcu), 0, cg);
-    }
-
-    fn toSliceLen(temp: *Temp, cg: *CodeGen) !void {
-        try temp.toLimb(.usize, 1, cg);
-    }
-
-    fn toReg(temp: *Temp, new_reg: Register, cg: *CodeGen) !bool {
-        const val, const ty: Type = val_ty: switch (temp.unwrap(cg)) {
-            .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) },
-            .temp => |temp_index| {
-                const temp_tracking = temp_index.tracking(cg);
-                if (temp_tracking.short == .register and
-                    temp_tracking.short.register == new_reg) return false;
-                break :val_ty .{ temp_tracking.short, temp_index.typeOf(cg) };
-            },
-            .err_ret_trace => .{ temp.tracking(cg).short, .usize },
-        };
-        const new_temp_index = cg.next_temp_index;
-        try cg.register_manager.getReg(new_reg, new_temp_index.toIndex());
-        cg.temp_type[@intFromEnum(new_temp_index)] = ty;
-        try cg.genSetReg(new_reg, ty, val, .{});
-        new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-        try temp.die(cg);
-        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
-        temp.* = .{ .index = new_temp_index.toIndex() };
-        return true;
-    }
-
-    fn toRegClass(temp: *Temp, mut: bool, rc: Register.Class, cg: *CodeGen) !bool {
-        const val = temp.tracking(cg).short;
-        if (!mut or temp.isMut(cg)) switch (val) {
-            else => {},
-            .register => |reg| if (reg.class() == rc) return false,
-            .register_offset => |reg_off| if (reg_off.reg.class() == rc and reg_off.off == 0) return false,
-        };
-        const ty = temp.typeOf(cg);
-        const new_temp_index = cg.next_temp_index;
-        cg.temp_type[@intFromEnum(new_temp_index)] = ty;
-        const new_reg = try cg.register_manager.allocReg(new_temp_index.toIndex(), regSetForRegClass(rc));
-        try cg.genSetReg(new_reg, ty, val, .{});
-        new_temp_index.tracking(cg).* = .init(.{ .register = new_reg });
-        try temp.die(cg);
-        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
-        temp.* = .{ .index = new_temp_index.toIndex() };
-        return true;
-    }
-
-    fn toPair(first_temp: *Temp, second_temp: *Temp, cg: *CodeGen) !void {
-        while (true) for ([_]*Temp{ first_temp, second_temp }) |part_temp| {
-            if (try part_temp.toRegClass(true, .general_purpose, cg)) break;
-        } else break;
-        const first_temp_tracking = first_temp.unwrap(cg).temp.tracking(cg);
-        const second_temp_tracking = second_temp.unwrap(cg).temp.tracking(cg);
-        const result: MCValue = .{ .register_pair = .{
-            first_temp_tracking.short.register,
-            second_temp_tracking.short.register,
-        } };
-        const result_temp_index = cg.next_temp_index;
-        const result_temp: Temp = .{ .index = result_temp_index.toIndex() };
-        assert(cg.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking));
-        assert(cg.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking));
-        cg.temp_type[@intFromEnum(result_temp_index)] = .slice_const_u8;
-        result_temp_index.tracking(cg).* = .init(result);
-        first_temp.* = result_temp;
-        second_temp.* = result_temp;
-    }
-
-    fn asMask(temp: Temp, info: MaskInfo, cg: *CodeGen) void {
-        assert(info.scalar != .none);
-        const mcv = &temp.unwrap(cg).temp.tracking(cg).short;
-        const reg = mcv.register;
-        mcv.* = .{ .register_mask = .{ .reg = reg, .info = info } };
-    }
-
-    fn toLea(temp: *Temp, cg: *CodeGen) !bool {
-        switch (temp.tracking(cg).short) {
-            .none,
-            .unreach,
-            .dead,
-            .undef,
-            .eflags,
-            .register_pair,
-            .register_triple,
-            .register_quadruple,
-            .register_overflow,
-            .register_mask,
-            .elementwise_regs_then_frame,
-            .reserved_frame,
-            .air_ref,
-            => unreachable, // not a valid pointer
-            .immediate,
-            .register,
-            .register_offset,
-            .lea_direct,
-            .lea_got,
-            .lea_tlv,
-            .lea_frame,
-            => return false,
-            .memory,
-            .indirect,
-            .load_symbol,
-            .load_direct,
-            .load_got,
-            .load_tlv,
-            .load_frame,
-            => return temp.toRegClass(true, .general_purpose, cg),
-            .lea_symbol => |sym_off| {
-                const off = sym_off.off;
-                if (off == 0) return false;
-                try temp.toOffset(-off, cg);
-                while (try temp.toRegClass(true, .general_purpose, cg)) {}
-                try temp.toOffset(off, cg);
-                return true;
-            },
-        }
-    }
-
-    fn toMemory(temp: *Temp, cg: *CodeGen) !bool {
-        const temp_tracking = temp.tracking(cg);
-        if (temp_tracking.short.isMemory()) return false;
-        const new_temp_index = cg.next_temp_index;
-        const ty = temp.typeOf(cg);
-        cg.temp_type[@intFromEnum(new_temp_index)] = ty;
-        const new_frame_index = try cg.allocFrameIndex(.initSpill(ty, cg.pt.zcu));
-        try cg.genSetMem(.{ .frame = new_frame_index }, 0, ty, temp_tracking.short, .{});
-        new_temp_index.tracking(cg).* = .init(.{ .load_frame = .{ .index = new_frame_index } });
-        try temp.die(cg);
-        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
-        temp.* = .{ .index = new_temp_index.toIndex() };
-        return true;
-    }
-
-    // hack around linker relocation bugs
-    fn toBase(temp: *Temp, cg: *CodeGen) !bool {
-        const temp_tracking = temp.tracking(cg);
-        if (temp_tracking.short.isBase()) return false;
-        if (try temp.toMemory(cg)) return true;
-        const new_temp_index = cg.next_temp_index;
-        cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg);
-        const new_reg =
-            try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp);
-        try cg.genSetReg(new_reg, .usize, temp_tracking.short.address(), .{});
-        new_temp_index.tracking(cg).* = .init(.{ .indirect = .{ .reg = new_reg } });
-        try temp.die(cg);
-        cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1);
-        temp.* = .{ .index = new_temp_index.toIndex() };
-        return true;
-    }
-
-    const AccessOptions = struct {
-        disp: i32 = 0,
-        safe: bool = false,
-    };
-
-    fn load(ptr: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp {
-        const val = try cg.tempAlloc(val_ty);
-        try ptr.toOffset(opts.disp, cg);
-        while (try ptr.toLea(cg)) {}
-        const val_mcv = val.tracking(cg).short;
-        switch (val_mcv) {
-            else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
-            .register => |val_reg| try ptr.loadReg(val_ty, registerAlias(
-                val_reg,
-                @intCast(val_ty.abiSize(cg.pt.zcu)),
-            ), cg),
-            inline .register_pair,
-            .register_triple,
-            .register_quadruple,
-            => |val_regs| for (val_regs) |val_reg| {
-                try ptr.loadReg(val_ty, val_reg, cg);
-                try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg);
-                while (try ptr.toLea(cg)) {}
-            },
-            .register_offset => |val_reg_off| switch (val_reg_off.off) {
-                0 => try ptr.loadReg(val_ty, registerAlias(
-                    val_reg_off.reg,
-                    @intCast(val_ty.abiSize(cg.pt.zcu)),
-                ), cg),
-                else => unreachable,
-            },
-            .memory, .indirect, .load_frame, .load_symbol => {
-                var val_ptr = try cg.tempInit(.usize, val_mcv.address());
-                var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
-                try val_ptr.memcpy(ptr, &len, cg);
-                try val_ptr.die(cg);
-                try len.die(cg);
-            },
-        }
-        return val;
-    }
-
-    fn store(ptr: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void {
-        const val_ty = val.typeOf(cg);
-        try ptr.toOffset(opts.disp, cg);
-        while (try ptr.toLea(cg)) {}
-        val_to_gpr: while (true) : (while (try ptr.toLea(cg) or
-            try val.toRegClass(false, .general_purpose, cg))
-        {}) {
-            const val_mcv = val.tracking(cg).short;
-            switch (val_mcv) {
-                else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
-                .undef => if (opts.safe) {
-                    var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa });
-                    var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
-                    try ptr.memset(&pat, &len, cg);
-                    try pat.die(cg);
-                    try len.die(cg);
-                },
-                .immediate => |val_imm| {
-                    const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31|
-                        .u(val_uimm31)
-                    else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32|
-                        .s(val_simm32)
-                    else
-                        continue :val_to_gpr;
-                    // hack around linker relocation bugs
-                    switch (ptr.tracking(cg).short) {
-                        else => {},
-                        .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
-                    }
-                    try cg.asmMemoryImmediate(
-                        .{ ._, .mov },
-                        try ptr.tracking(cg).short.deref().mem(cg, .{
-                            .size = cg.memSize(val_ty),
-                        }),
-                        val_op,
-                    );
-                },
-                .eflags => |cc| {
-                    // hack around linker relocation bugs
-                    switch (ptr.tracking(cg).short) {
-                        else => {},
-                        .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
-                    }
-                    try cg.asmSetccMemory(
-                        cc,
-                        try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }),
-                    );
-                },
-                .register => |val_reg| try ptr.storeRegs(val_ty, &.{registerAlias(
-                    val_reg,
-                    @intCast(val_ty.abiSize(cg.pt.zcu)),
-                )}, cg),
-                inline .register_pair,
-                .register_triple,
-                .register_quadruple,
-                => |val_regs| try ptr.storeRegs(val_ty, &val_regs, cg),
-                .register_offset => |val_reg_off| switch (val_reg_off.off) {
-                    0 => try ptr.storeRegs(val_ty, &.{registerAlias(
-                        val_reg_off.reg,
-                        @intCast(val_ty.abiSize(cg.pt.zcu)),
-                    )}, cg),
-                    else => continue :val_to_gpr,
-                },
-                .register_overflow => |val_reg_ov| {
-                    const ip = &cg.pt.zcu.intern_pool;
-                    const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) {
-                        .tuple_type => |tuple_type| {
-                            const tuple_field_types = tuple_type.types.get(ip);
-                            assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type);
-                            break :first_ty tuple_field_types[0];
-                        },
-                        .opt_type => |opt_child| {
-                            assert(!val_ty.optionalReprIsPayload(cg.pt.zcu));
-                            break :first_ty opt_child;
-                        },
-                        else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
-                    });
-                    const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
-                    try ptr.storeRegs(first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg);
-                    try ptr.toOffset(first_size, cg);
-                    try cg.asmSetccMemory(
-                        val_reg_ov.eflags,
-                        try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }),
-                    );
-                },
-                .lea_frame, .lea_symbol => continue :val_to_gpr,
-                .memory, .indirect, .load_frame, .load_symbol => {
-                    var val_ptr = try cg.tempInit(.usize, val_mcv.address());
-                    var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
-                    try ptr.memcpy(&val_ptr, &len, cg);
-                    try val_ptr.die(cg);
-                    try len.die(cg);
-                },
-            }
-            break;
-        }
-    }
-
-    fn read(src: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp {
-        var val = try cg.tempAlloc(val_ty);
-        while (try src.toBase(cg)) {}
-        const val_mcv = val.tracking(cg).short;
-        switch (val_mcv) {
-            else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
-            .register => |val_reg| try src.readReg(opts.disp, val_ty, registerAlias(
-                val_reg,
-                @intCast(val_ty.abiSize(cg.pt.zcu)),
-            ), cg),
-            inline .register_pair, .register_triple, .register_quadruple => |val_regs| {
-                var disp = opts.disp;
-                for (val_regs) |val_reg| {
-                    try src.readReg(disp, val_ty, val_reg, cg);
-                    disp += @divExact(val_reg.bitSize(), 8);
-                }
-            },
-            .register_offset => |val_reg_off| switch (val_reg_off.off) {
-                0 => try src.readReg(opts.disp, val_ty, registerAlias(
-                    val_reg_off.reg,
-                    @intCast(val_ty.abiSize(cg.pt.zcu)),
-                ), cg),
-                else => unreachable,
-            },
-            .memory, .indirect, .load_frame, .load_symbol => {
-                var val_ptr = try cg.tempInit(.usize, val_mcv.address());
-                var src_ptr =
-                    try cg.tempInit(.usize, src.tracking(cg).short.address().offset(opts.disp));
-                var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
-                try val_ptr.memcpy(&src_ptr, &len, cg);
-                try val_ptr.die(cg);
-                try src_ptr.die(cg);
-                try len.die(cg);
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ },
+                        .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_ymm, .mem } },
+                        .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_ymm, .to_ymm } },
+                    },
+                    .extra_temps = .{
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ },
+                        .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .yword, .is = .xword } },
+                        .{ .remainder_int = .{ .of = .yword, .is = .xword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqu, .mov, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ },
+                        .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .memiad(.src1y, .tmp0, .add_size, -16), ._ },
+                        .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1x, .memad(.src0x, .add_size, -16), ._, ._ },
+                        .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .memad(.src1x, .add_size, -16), ._ },
+                        .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .yword, .is = .xword } },
+                        .{ .remainder_int = .{ .of = .yword, .is = .xword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_pd, .movu, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ },
+                        .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .memiad(.src1y, .tmp0, .add_size, -16), ._ },
+                        .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, .v_pd, .mova, .tmp1x, .memad(.src0x, .add_size, -16), ._, ._ },
+                        .{ ._, .v_pd, .xor, .tmp1x, .tmp1x, .memad(.src1x, .add_size, -16), ._ },
+                        .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .yword, .is = .yword } },
+                        .{ .remainder_int = .{ .of = .yword, .is = .yword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqu, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .yword, .is = .yword } },
+                        .{ .remainder_int = .{ .of = .yword, .is = .yword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_pd, .movu, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .xword, .is = .xword } },
+                        .{ .remainder_int = .{ .of = .xword, .is = .xword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .xword, .is = .xword } },
+                        .{ .remainder_int = .{ .of = .xword, .is = .xword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .xor, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .@"test", .tmp1x, .tmp1x, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, .fast_imm16, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .xword, .is = .xword } },
+                        .{ .remainder_int = .{ .of = .xword, .is = .xword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .cmpeq, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .movmsk, .tmp2d, .tmp1x, ._, ._ },
+                        .{ ._, ._, .cmp, .tmp2w, .si(-1), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .xword, .is = .xword } },
+                        .{ .remainder_int = .{ .of = .xword, .is = .xword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .cmpeq, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .movmsk, .tmp2d, .tmp1x, ._, ._ },
+                        .{ ._, ._, .xor, .tmp2d, .ui(0xffff), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, .mmx, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+                        .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .mmx } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._q, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .cmpeq, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .movmsk, .tmp2d, .tmp1q, ._, ._ },
+                        .{ ._, ._, .cmp, .tmp2b, .si(-1), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+                        .{ .remainder_int = .{ .of = .qword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .remainder_int = .{ .of = .dword, .is = .dword } },
+                        .{ .remainder_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .cc = .z }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                } });
             },
         }
-        return val;
-    }
-
-    fn write(dst: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void {
-        const val_ty = val.typeOf(cg);
-        while (try dst.toBase(cg)) {}
-        val_to_gpr: while (true) : (while (try dst.toBase(cg) or
-            try val.toRegClass(false, .general_purpose, cg))
-        {}) {
-            const val_mcv = val.tracking(cg).short;
-            switch (val_mcv) {
-                else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }),
-                .undef => if (opts.safe) {
-                    var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp));
-                    var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa });
-                    var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
-                    try dst_ptr.memset(&pat, &len, cg);
-                    try dst_ptr.die(cg);
-                    try pat.die(cg);
-                    try len.die(cg);
-                },
-                .immediate => |val_imm| {
-                    const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31|
-                        .u(val_uimm31)
-                    else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32|
-                        .s(val_simm32)
-                    else
-                        continue :val_to_gpr;
-                    try cg.asmMemoryImmediate(
-                        .{ ._, .mov },
-                        try dst.tracking(cg).short.mem(cg, .{
-                            .size = cg.memSize(val_ty),
-                            .disp = opts.disp,
-                        }),
-                        val_op,
-                    );
-                },
-                .eflags => |cc| try cg.asmSetccMemory(
-                    cc,
-                    try dst.tracking(cg).short.mem(cg, .{
-                        .size = .byte,
-                        .disp = opts.disp,
-                    }),
-                ),
-                .register => |val_reg| try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
-                    val_reg,
-                    @intCast(val_ty.abiSize(cg.pt.zcu)),
-                )}, cg),
-                inline .register_pair,
-                .register_triple,
-                .register_quadruple,
-                => |val_regs| try dst.writeRegs(opts.disp, val_ty, &val_regs, cg),
-                .register_offset => |val_reg_off| switch (val_reg_off.off) {
-                    0 => try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
-                        val_reg_off.reg,
-                        @intCast(val_ty.abiSize(cg.pt.zcu)),
-                    )}, cg),
-                    else => continue :val_to_gpr,
-                },
-                .register_overflow => |val_reg_ov| {
-                    const ip = &cg.pt.zcu.intern_pool;
-                    const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) {
-                        .tuple_type => |tuple_type| {
-                            const tuple_field_types = tuple_type.types.get(ip);
-                            assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type);
-                            break :first_ty tuple_field_types[0];
-                        },
-                        .opt_type => |opt_child| {
-                            assert(!val_ty.optionalReprIsPayload(cg.pt.zcu));
-                            break :first_ty opt_child;
-                        },
-                        else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
-                    });
-                    const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
-                    try dst.writeRegs(opts.disp, first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg);
-                    try cg.asmSetccMemory(
-                        val_reg_ov.eflags,
-                        try dst.tracking(cg).short.mem(cg, .{
-                            .size = .byte,
-                            .disp = opts.disp + first_size,
-                        }),
-                    );
-                },
-                .lea_frame, .lea_symbol => continue :val_to_gpr,
-                .memory, .indirect, .load_frame, .load_symbol => {
-                    var dst_ptr =
-                        try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp));
-                    var val_ptr = try cg.tempInit(.usize, val_mcv.address());
-                    var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) });
-                    try dst_ptr.memcpy(&val_ptr, &len, cg);
-                    try dst_ptr.die(cg);
-                    try val_ptr.die(cg);
-                    try len.die(cg);
-                },
-            }
-            break;
-        }
-    }
-
-    fn loadReg(ptr: *Temp, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void {
-        const dst_rc = dst_reg.class();
-        const strat = try cg.moveStrategy(dst_ty, dst_rc, false);
-        // hack around linker relocation bugs
-        switch (ptr.tracking(cg).short) {
-            else => {},
-            .lea_symbol => |sym_off| if (dst_rc != .general_purpose or sym_off.off != 0)
-                while (try ptr.toRegClass(false, .general_purpose, cg)) {},
-        }
-        try strat.read(cg, dst_reg, try ptr.tracking(cg).short.deref().mem(cg, .{
-            .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())),
-        }));
-    }
-
-    fn storeRegs(ptr: *Temp, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void {
-        var part_disp: u31 = 0;
-        var deferred_disp: u31 = 0;
-        var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu));
-        for (src_regs) |src_reg| {
-            const src_rc = src_reg.class();
-            const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize());
-            const part_size = @divExact(part_bit_size, 8);
-            if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) {
-                // hack around linker relocation bugs
-                switch (ptr.tracking(cg).short) {
-                    else => {},
-                    .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {},
-                }
-                const strat = try cg.moveStrategy(src_ty, src_rc, false);
-                try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{
-                    .size = .fromBitSize(part_bit_size),
-                    .disp = part_disp,
-                }), registerAlias(src_reg, part_size));
-            } else {
-                const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size);
-                const frame_index = try cg.allocFrameIndex(.init(.{
-                    .size = frame_size,
-                    .alignment = .fromNonzeroByteUnits(frame_size),
-                }));
-                const strat = try cg.moveStrategy(src_ty, src_rc, true);
-                try strat.write(cg, .{
-                    .base = .{ .frame = frame_index },
-                    .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
-                }, registerAlias(src_reg, frame_size));
-                try ptr.toOffset(deferred_disp, cg);
-                deferred_disp = 0;
-                var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
-                var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
-                try ptr.memcpy(&src_ptr, &len, cg);
-                try src_ptr.die(cg);
-                try len.die(cg);
-            }
-            part_disp += part_size;
-            deferred_disp += part_size;
-            src_abi_size -= part_size;
-        }
-    }
-
-    fn readReg(src: Temp, disp: i32, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void {
-        const strat = try cg.moveStrategy(dst_ty, dst_reg.class(), false);
-        try strat.read(cg, dst_reg, try src.tracking(cg).short.mem(cg, .{
-            .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())),
-            .disp = disp,
-        }));
-    }
-
-    fn writeRegs(dst: Temp, disp: i32, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void {
-        var part_disp = disp;
-        var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu));
-        for (src_regs) |src_reg| {
-            const src_rc = src_reg.class();
-            const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize());
-            const part_size = @divExact(part_bit_size, 8);
-            if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) {
-                const strat = try cg.moveStrategy(src_ty, src_rc, false);
-                try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
-                    .size = .fromBitSize(part_bit_size),
-                    .disp = part_disp,
-                }), registerAlias(src_reg, part_size));
-            } else {
-                const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size);
-                const frame_index = try cg.allocFrameIndex(.init(.{
-                    .size = frame_size,
-                    .alignment = .fromNonzeroByteUnits(frame_size),
-                }));
-                const strat = try cg.moveStrategy(src_ty, src_rc, true);
-                try strat.write(cg, .{
-                    .base = .{ .frame = frame_index },
-                    .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
-                }, registerAlias(src_reg, frame_size));
-                var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
-                try dst_ptr.toOffset(part_disp, cg);
-                var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
-                var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
-                try dst_ptr.memcpy(&src_ptr, &len, cg);
-                try dst_ptr.die(cg);
-                try src_ptr.die(cg);
-                try len.die(cg);
-            }
-            part_disp += part_size;
-            src_abi_size -= part_size;
+        if (switch (op) {
+            .lt, .gt, .eq => false,
+            .lte, .gte, .neq => true,
+        }) {
+            const cc = &res[0].unwrap(cg).temp.tracking(cg).short.eflags;
+            cc.* = cc.negate();
         }
-    }
-
-    fn memcpy(dst: *Temp, src: *Temp, len: *Temp, cg: *CodeGen) !void {
-        while (true) for ([_]*Temp{ dst, src, len }, [_]Register{ .rdi, .rsi, .rcx }) |temp, reg| {
-            if (try temp.toReg(reg, cg)) break;
-        } else break;
-        try cg.asmOpOnly(.{ .@"rep _sb", .mov });
-    }
-
-    fn memset(dst: *Temp, val: *Temp, len: *Temp, cg: *CodeGen) !void {
-        while (true) for ([_]*Temp{ dst, val, len }, [_]Register{ .rdi, .rax, .rcx }) |temp, reg| {
-            if (try temp.toReg(reg, cg)) break;
-        } else break;
-        try cg.asmOpOnly(.{ .@"rep _sb", .sto });
+        lhs.*, rhs.* = ops;
+        return res[0];
     }
 
     fn finish(
@@ -41790,7 +42381,7 @@ const Temp = struct {
         }
     }
 
-    const Index = enum(u4) {
+    const Index = enum(u5) {
         _,
 
         fn toIndex(index: Index) Air.Inst.Index {
@@ -41837,12 +42428,20 @@ const Temp = struct {
     };
 };
 
-fn resetTemps(cg: *CodeGen) void {
+fn resetTemps(cg: *CodeGen) !void {
+    var any_valid = false;
     for (0..@intFromEnum(cg.next_temp_index)) |temp_index| {
         const temp: Temp.Index = @enumFromInt(temp_index);
-        assert(!temp.isValid(cg));
+        if (temp.isValid(cg)) {
+            any_valid = true;
+            tracking_log.err("failed to kill {}: {}", .{
+                temp.toIndex(),
+                cg.temp_type[temp_index].fmt(cg.pt),
+            });
+        }
         cg.temp_type[temp_index] = undefined;
     }
+    if (any_valid) return cg.fail("failed to kill all temps", .{});
     cg.next_temp_index = @enumFromInt(0);
 }
 
@@ -42228,6 +42827,8 @@ const Select = struct {
         signed_po2_int: Memory.Size,
         unsigned_po2_or_exact_int: Memory.Size,
         remainder_int: OfIsSizes,
+        signed_remainder_int: OfIsSizes,
+        unsigned_remainder_int: OfIsSizes,
         exact_remainder_int: OfIsSizes,
         signed_or_exact_remainder_int: OfIsSizes,
         unsigned_or_exact_remainder_int: OfIsSizes,
@@ -42342,6 +42943,10 @@ const Select = struct {
                     of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1
                 else
                     false,
+                .signed_remainder_int => |of_is| if (cg.intInfo(ty)) |int_info| int_info.signedness == .signed and
+                    of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 else false,
+                .unsigned_remainder_int => |of_is| if (cg.intInfo(ty)) |int_info| int_info.signedness == .unsigned and
+                    of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 else false,
                 .exact_remainder_int => |of_is| if (cg.intInfo(ty)) |int_info|
                     of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1
                 else
@@ -42749,6 +43354,7 @@ const Select = struct {
             const add_size: Adjust = .{ .sign = .pos, .lhs = .size, .op = .mul, .rhs = .@"1" };
             const add_size_div_8: Adjust = .{ .sign = .pos, .lhs = .size, .op = .div, .rhs = .@"8" };
             const sub_size_div_8: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"8" };
+            const sub_size_div_4: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"4" };
             const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" };
             const add_size_sub_elem_size: Adjust = .{ .sign = .pos, .lhs = .size_sub_elem_size, .op = .mul, .rhs = .@"1" };
             const add_src0_unaligned_size: Adjust = .{ .sign = .pos, .lhs = .src0_unaligned_size, .op = .mul, .rhs = .@"1" };
test/behavior/x86_64/math.zig
@@ -17,7 +17,14 @@ const Sse = if (std.Target.x86.featureSetHas(builtin.cpu.features, .avx))
 else
     @Vector(16, u8);
 
-inline fn sign(rhs: anytype) switch (@typeInfo(@TypeOf(rhs))) {
+inline fn runtime(comptime Type: type, comptime value: Type) Type {
+    if (@inComptime()) return value;
+    return struct {
+        var variable: Type = value;
+    }.variable;
+}
+
+fn sign(rhs: anytype) switch (@typeInfo(@TypeOf(rhs))) {
     else => bool,
     .vector => |vector| @Vector(vector.len, bool),
 } {
@@ -39,7 +46,7 @@ inline fn sign(rhs: anytype) switch (@typeInfo(@TypeOf(rhs))) {
         },
     }
 }
-inline fn boolAnd(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
+fn boolAnd(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
     switch (@typeInfo(@TypeOf(lhs))) {
         .bool => return lhs and rhs,
         .vector => |vector| switch (vector.child) {
@@ -55,7 +62,7 @@ inline fn boolAnd(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
     }
     @compileError("unsupported boolAnd type: " ++ @typeName(@TypeOf(lhs)));
 }
-inline fn boolOr(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
+fn boolOr(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
     switch (@typeInfo(@TypeOf(lhs))) {
         .bool => return lhs or rhs,
         .vector => |vector| switch (vector.child) {
@@ -186,6 +193,90 @@ fn unary(comptime op: anytype, comptime opts: struct { strict: bool = false }) t
             );
         }
         fn testIntTypes() !void {
+            try testArgs(i1, undefined);
+            try testArgs(u1, undefined);
+            try testArgs(i2, undefined);
+            try testArgs(u2, undefined);
+            try testArgs(i3, undefined);
+            try testArgs(u3, undefined);
+            try testArgs(i4, undefined);
+            try testArgs(u4, undefined);
+            try testArgs(i5, undefined);
+            try testArgs(u5, undefined);
+            try testArgs(i7, undefined);
+            try testArgs(u7, undefined);
+            try testArgs(i8, undefined);
+            try testArgs(u8, undefined);
+            try testArgs(i9, undefined);
+            try testArgs(u9, undefined);
+            try testArgs(i15, undefined);
+            try testArgs(u15, undefined);
+            try testArgs(i16, undefined);
+            try testArgs(u16, undefined);
+            try testArgs(i17, undefined);
+            try testArgs(u17, undefined);
+            try testArgs(i31, undefined);
+            try testArgs(u31, undefined);
+            try testArgs(i32, undefined);
+            try testArgs(u32, undefined);
+            try testArgs(i33, undefined);
+            try testArgs(u33, undefined);
+            try testArgs(i63, undefined);
+            try testArgs(u63, undefined);
+            try testArgs(i64, undefined);
+            try testArgs(u64, undefined);
+            try testArgs(i65, undefined);
+            try testArgs(u65, undefined);
+            try testArgs(i95, undefined);
+            try testArgs(u95, undefined);
+            try testArgs(i96, undefined);
+            try testArgs(u96, undefined);
+            try testArgs(i97, undefined);
+            try testArgs(u97, undefined);
+            try testArgs(i127, undefined);
+            try testArgs(u127, undefined);
+            try testArgs(i128, undefined);
+            try testArgs(u128, undefined);
+            try testArgs(i129, undefined);
+            try testArgs(u129, undefined);
+            try testArgs(i159, undefined);
+            try testArgs(u159, undefined);
+            try testArgs(i160, undefined);
+            try testArgs(u160, undefined);
+            try testArgs(i161, undefined);
+            try testArgs(u161, undefined);
+            try testArgs(i191, undefined);
+            try testArgs(u191, undefined);
+            try testArgs(i192, undefined);
+            try testArgs(u192, undefined);
+            try testArgs(i193, undefined);
+            try testArgs(u193, undefined);
+            try testArgs(i223, undefined);
+            try testArgs(u223, undefined);
+            try testArgs(i224, undefined);
+            try testArgs(u224, undefined);
+            try testArgs(i225, undefined);
+            try testArgs(u225, undefined);
+            try testArgs(i255, undefined);
+            try testArgs(u255, undefined);
+            try testArgs(i256, undefined);
+            try testArgs(u256, undefined);
+            try testArgs(i257, undefined);
+            try testArgs(u257, undefined);
+            try testArgs(i511, undefined);
+            try testArgs(u511, undefined);
+            try testArgs(i512, undefined);
+            try testArgs(u512, undefined);
+            try testArgs(i513, undefined);
+            try testArgs(u513, undefined);
+            try testArgs(i1023, undefined);
+            try testArgs(u1023, undefined);
+            try testArgs(i1024, undefined);
+            try testArgs(u1024, undefined);
+            try testArgs(i1025, undefined);
+            try testArgs(u1025, undefined);
+        }
+        fn testInts() !void {
             try testArgs(i1, -1);
             try testArgs(i1, 0);
             try testArgs(u1, 0);
@@ -549,6 +640,13 @@ fn unary(comptime op: anytype, comptime opts: struct { strict: bool = false }) t
             try testArgs(u1025, 1 << 1024);
         }
         fn testFloatTypes() !void {
+            try testArgs(f16, undefined);
+            try testArgs(f32, undefined);
+            try testArgs(f64, undefined);
+            try testArgs(f80, undefined);
+            try testArgs(f128, undefined);
+        }
+        fn testFloats() !void {
             try testArgs(f16, -nan(f16));
             try testArgs(f16, -inf(f16));
             try testArgs(f16, -fmax(f16));
@@ -645,6 +743,168 @@ fn unary(comptime op: anytype, comptime opts: struct { strict: bool = false }) t
             try testArgs(f128, nan(f128));
         }
         fn testIntVectorTypes() !void {
+            try testArgs(@Vector(3, i1), undefined);
+            try testArgs(@Vector(3, u1), undefined);
+            try testArgs(@Vector(3, i2), undefined);
+            try testArgs(@Vector(3, u2), undefined);
+            try testArgs(@Vector(3, i3), undefined);
+            try testArgs(@Vector(3, u3), undefined);
+            try testArgs(@Vector(3, i4), undefined);
+            try testArgs(@Vector(1, i4), undefined);
+            try testArgs(@Vector(2, i4), undefined);
+            try testArgs(@Vector(4, i4), undefined);
+            try testArgs(@Vector(8, i4), undefined);
+            try testArgs(@Vector(16, i4), undefined);
+            try testArgs(@Vector(32, i4), undefined);
+            try testArgs(@Vector(64, i4), undefined);
+            try testArgs(@Vector(128, i4), undefined);
+            try testArgs(@Vector(256, i4), undefined);
+            try testArgs(@Vector(3, u4), undefined);
+            try testArgs(@Vector(1, u4), undefined);
+            try testArgs(@Vector(2, u4), undefined);
+            try testArgs(@Vector(4, u4), undefined);
+            try testArgs(@Vector(8, u4), undefined);
+            try testArgs(@Vector(16, u4), undefined);
+            try testArgs(@Vector(32, u4), undefined);
+            try testArgs(@Vector(64, u4), undefined);
+            try testArgs(@Vector(128, u4), undefined);
+            try testArgs(@Vector(256, u4), undefined);
+            try testArgs(@Vector(3, i5), undefined);
+            try testArgs(@Vector(3, u5), undefined);
+            try testArgs(@Vector(3, i7), undefined);
+            try testArgs(@Vector(3, u7), undefined);
+            try testArgs(@Vector(3, i8), undefined);
+            try testArgs(@Vector(1, i8), undefined);
+            try testArgs(@Vector(2, i8), undefined);
+            try testArgs(@Vector(4, i8), undefined);
+            try testArgs(@Vector(8, i8), undefined);
+            try testArgs(@Vector(16, i8), undefined);
+            try testArgs(@Vector(32, i8), undefined);
+            try testArgs(@Vector(64, i8), undefined);
+            try testArgs(@Vector(128, i8), undefined);
+            try testArgs(@Vector(3, u8), undefined);
+            try testArgs(@Vector(1, u8), undefined);
+            try testArgs(@Vector(2, u8), undefined);
+            try testArgs(@Vector(4, u8), undefined);
+            try testArgs(@Vector(8, u8), undefined);
+            try testArgs(@Vector(16, u8), undefined);
+            try testArgs(@Vector(32, u8), undefined);
+            try testArgs(@Vector(64, u8), undefined);
+            try testArgs(@Vector(128, u8), undefined);
+            try testArgs(@Vector(3, i9), undefined);
+            try testArgs(@Vector(3, u9), undefined);
+            try testArgs(@Vector(3, i15), undefined);
+            try testArgs(@Vector(3, u15), undefined);
+            try testArgs(@Vector(3, i16), undefined);
+            try testArgs(@Vector(1, i16), undefined);
+            try testArgs(@Vector(2, i16), undefined);
+            try testArgs(@Vector(4, i16), undefined);
+            try testArgs(@Vector(8, i16), undefined);
+            try testArgs(@Vector(16, i16), undefined);
+            try testArgs(@Vector(32, i16), undefined);
+            try testArgs(@Vector(64, i16), undefined);
+            try testArgs(@Vector(3, u16), undefined);
+            try testArgs(@Vector(1, u16), undefined);
+            try testArgs(@Vector(2, u16), undefined);
+            try testArgs(@Vector(4, u16), undefined);
+            try testArgs(@Vector(8, u16), undefined);
+            try testArgs(@Vector(16, u16), undefined);
+            try testArgs(@Vector(32, u16), undefined);
+            try testArgs(@Vector(64, u16), undefined);
+            try testArgs(@Vector(3, i17), undefined);
+            try testArgs(@Vector(3, u17), undefined);
+            try testArgs(@Vector(3, i31), undefined);
+            try testArgs(@Vector(3, u31), undefined);
+            try testArgs(@Vector(3, i32), undefined);
+            try testArgs(@Vector(1, i32), undefined);
+            try testArgs(@Vector(2, i32), undefined);
+            try testArgs(@Vector(4, i32), undefined);
+            try testArgs(@Vector(8, i32), undefined);
+            try testArgs(@Vector(16, i32), undefined);
+            try testArgs(@Vector(32, i32), undefined);
+            try testArgs(@Vector(3, u32), undefined);
+            try testArgs(@Vector(1, u32), undefined);
+            try testArgs(@Vector(2, u32), undefined);
+            try testArgs(@Vector(4, u32), undefined);
+            try testArgs(@Vector(8, u32), undefined);
+            try testArgs(@Vector(16, u32), undefined);
+            try testArgs(@Vector(32, u32), undefined);
+            try testArgs(@Vector(3, i33), undefined);
+            try testArgs(@Vector(3, u33), undefined);
+            try testArgs(@Vector(3, i63), undefined);
+            try testArgs(@Vector(3, u63), undefined);
+            try testArgs(@Vector(3, i64), undefined);
+            try testArgs(@Vector(1, i64), undefined);
+            try testArgs(@Vector(2, i64), undefined);
+            try testArgs(@Vector(4, i64), undefined);
+            try testArgs(@Vector(8, i64), undefined);
+            try testArgs(@Vector(16, i64), undefined);
+            try testArgs(@Vector(3, u64), undefined);
+            try testArgs(@Vector(1, u64), undefined);
+            try testArgs(@Vector(2, u64), undefined);
+            try testArgs(@Vector(4, u64), undefined);
+            try testArgs(@Vector(8, u64), undefined);
+            try testArgs(@Vector(16, u64), undefined);
+            try testArgs(@Vector(3, i65), undefined);
+            try testArgs(@Vector(3, u65), undefined);
+            try testArgs(@Vector(3, i127), undefined);
+            try testArgs(@Vector(3, u127), undefined);
+            try testArgs(@Vector(3, i128), undefined);
+            try testArgs(@Vector(1, i128), undefined);
+            try testArgs(@Vector(2, i128), undefined);
+            try testArgs(@Vector(4, i128), undefined);
+            try testArgs(@Vector(8, i128), undefined);
+            try testArgs(@Vector(3, u128), undefined);
+            try testArgs(@Vector(1, u128), undefined);
+            try testArgs(@Vector(2, u128), undefined);
+            try testArgs(@Vector(4, u128), undefined);
+            try testArgs(@Vector(8, u128), undefined);
+            try testArgs(@Vector(3, i129), undefined);
+            try testArgs(@Vector(3, u129), undefined);
+            try testArgs(@Vector(3, i191), undefined);
+            try testArgs(@Vector(3, u191), undefined);
+            try testArgs(@Vector(3, i192), undefined);
+            try testArgs(@Vector(1, i192), undefined);
+            try testArgs(@Vector(2, i192), undefined);
+            try testArgs(@Vector(4, i192), undefined);
+            try testArgs(@Vector(3, u192), undefined);
+            try testArgs(@Vector(1, u192), undefined);
+            try testArgs(@Vector(2, u192), undefined);
+            try testArgs(@Vector(4, u192), undefined);
+            try testArgs(@Vector(3, i193), undefined);
+            try testArgs(@Vector(3, u193), undefined);
+            try testArgs(@Vector(3, i255), undefined);
+            try testArgs(@Vector(3, u255), undefined);
+            try testArgs(@Vector(3, i256), undefined);
+            try testArgs(@Vector(1, i256), undefined);
+            try testArgs(@Vector(2, i256), undefined);
+            try testArgs(@Vector(4, i256), undefined);
+            try testArgs(@Vector(3, u256), undefined);
+            try testArgs(@Vector(1, u256), undefined);
+            try testArgs(@Vector(2, u256), undefined);
+            try testArgs(@Vector(4, u256), undefined);
+            try testArgs(@Vector(3, i257), undefined);
+            try testArgs(@Vector(3, u257), undefined);
+            try testArgs(@Vector(3, i511), undefined);
+            try testArgs(@Vector(3, u511), undefined);
+            try testArgs(@Vector(3, i512), undefined);
+            try testArgs(@Vector(1, i512), undefined);
+            try testArgs(@Vector(2, i512), undefined);
+            try testArgs(@Vector(3, u512), undefined);
+            try testArgs(@Vector(1, u512), undefined);
+            try testArgs(@Vector(2, u512), undefined);
+            try testArgs(@Vector(3, i513), undefined);
+            try testArgs(@Vector(3, u513), undefined);
+            try testArgs(@Vector(3, i1023), undefined);
+            try testArgs(@Vector(3, u1023), undefined);
+            try testArgs(@Vector(3, i1024), undefined);
+            try testArgs(@Vector(1, i1024), undefined);
+            try testArgs(@Vector(3, u1024), undefined);
+            try testArgs(@Vector(1, u1024), undefined);
+            try testArgs(@Vector(3, i1025), undefined);
+            try testArgs(@Vector(3, u1025), undefined);
+        }
+        fn testIntVectors() !void {
             try testArgs(@Vector(3, i1), .{ -1 << 0, -1, 0 });
             try testArgs(@Vector(3, u1), .{ 0, 1, 1 << 0 });
 
@@ -1190,6 +1450,38 @@ fn unary(comptime op: anytype, comptime opts: struct { strict: bool = false }) t
             try testArgs(@Vector(3, u1025), .{ 0, 1, 1 << 1024 });
         }
         fn testFloatVectorTypes() !void {
+            try testArgs(@Vector(1, f16), undefined);
+            try testArgs(@Vector(2, f16), undefined);
+            try testArgs(@Vector(4, f16), undefined);
+            try testArgs(@Vector(8, f16), undefined);
+            try testArgs(@Vector(16, f16), undefined);
+            try testArgs(@Vector(32, f16), undefined);
+            try testArgs(@Vector(64, f16), undefined);
+
+            try testArgs(@Vector(1, f32), undefined);
+            try testArgs(@Vector(2, f32), undefined);
+            try testArgs(@Vector(4, f32), undefined);
+            try testArgs(@Vector(8, f32), undefined);
+            try testArgs(@Vector(16, f32), undefined);
+            try testArgs(@Vector(32, f32), undefined);
+
+            try testArgs(@Vector(1, f64), undefined);
+            try testArgs(@Vector(2, f64), undefined);
+            try testArgs(@Vector(4, f64), undefined);
+            try testArgs(@Vector(8, f64), undefined);
+            try testArgs(@Vector(16, f64), undefined);
+
+            try testArgs(@Vector(1, f80), undefined);
+            try testArgs(@Vector(2, f80), undefined);
+            try testArgs(@Vector(4, f80), undefined);
+            try testArgs(@Vector(8, f80), undefined);
+
+            try testArgs(@Vector(1, f128), undefined);
+            try testArgs(@Vector(2, f128), undefined);
+            try testArgs(@Vector(4, f128), undefined);
+            try testArgs(@Vector(8, f128), undefined);
+        }
+        fn testFloatVectors() !void {
             try testArgs(@Vector(1, f16), .{
                 -0x1.17cp-12,
             });
@@ -1396,7 +1688,7 @@ fn binary(comptime op: anytype, comptime opts: struct { strict: bool = false })
                 imm_rhs,
             );
         }
-        fn testIntTypes() !void {
+        fn testInts() !void {
             try testArgs(i8, 0x48, 0x6c);
             try testArgs(u8, 0xbb, 0x43);
             try testArgs(i16, -0x0fdf, 0x302e);
@@ -1407,48 +1699,16 @@ fn binary(comptime op: anytype, comptime opts: struct { strict: bool = false })
             try testArgs(u64, 0x71138bc6b4a38898, 0x1bc4043de9438c7b);
             try testArgs(i128, 0x76d428c46cdeaa2ac43de8abffb22f6d, 0x427f7545abe434a12544fdbe2a012889);
             try testArgs(u128, 0xe05fc132ef2cd8affee00a907f0a851f, 0x29f912a72cfc6a7c6973426a9636da9a);
-            try testArgs(
-                i256,
-                -0x53d4148cee74ea43477a65b3daa7b8fdadcbf4508e793f4af113b8d8da5a7eb6,
-                -0x30dcbaf7b9b7a3df033694e6795444d842fb0b8f79bc18b3ea8a6b7ccad3ea91,
-            );
-            try testArgs(
-                u256,
-                0xb7935f5c2f3b1ae7a422c0a7c446884294b7d5370bada307d2fe5a4c4284a999,
-                0x310e6e196ba4f143b8d285ca6addf7f3bb3344224aff221b27607a31e148be08,
-            );
-            try testArgs(
-                i258,
-                -0x0eee283365108dbeea0bec82f5147418d8ffe86f9eed00e414b4eccd65c21239a,
-                -0x122c730073fc29a24cd6e3e6263566879bc5325d8566b8db31fcb4a76f7ab95eb,
-            );
-            try testArgs(
-                u258,
-                0x186d5ddaab8cb8cb04e5b41e36f812e039d008baf49f12894c39e29a07796d800,
-                0x2072daba6ffad168826163eb136f6d28ca4360c8e7e5e41e29755e19e4753a4f5,
-            );
-            try testArgs(
-                i495,
-                0x2fe6bc5448c55ce18252e2c9d44777505dfe63ff249a8027a6626c7d8dd9893fd5731e51474727be556f757facb586a4e04bbc0148c6c7ad692302f46fbd,
-                -0x016a358821ef8240172f3a08e8830c06e6bcf2225f5f4d41ed42b44d249385f55cc594e1278ecac31c73faed890e5054af1a561483bb1bb6fb1f753514cf,
-            );
-            try testArgs(
-                u495,
-                0x6eaf4e252b3bf74b75bac59e0b43ca5326bad2a25b3fdb74a67ef132ac5e47d72eebc3316fb2351ee66c50dc5afb92a75cea9b0e35160652c7db39eeb158,
-                0x49fbed744a92b549d8c05bb3512c617d24dd824f3f69bdf3923bc326a75674b85f5b828d2566fab9c86f571d12c2a63c9164feb0d191d27905533d09622a,
-            );
-            try testArgs(
-                i512,
-                -0x3a6876ca92775286c6e1504a64a9b8d56985bebf4a1b66539d404e0e96f24b226f70c4bcff295fdc2043b82513b2052dc45fd78f7e9e80e5b3e101757289f054,
-                0x5080c516a819bd32a0a5f0976441bbfbcf89e77684f1f10eb326aeb28e1f8d593278cff60fc99b8ffc87d8696882c64728dd3c322b7142803f4341f85a03bc10,
-            );
-            try testArgs(
-                u512,
-                0xe5b1fedca3c77db765e517aabd05ffc524a3a8aff1784bbf67c45b894447ede32b65b9940e78173c591e56e078932d465f235aece7ad47b7f229df7ba8f12295,
-                0x8b4bb7c2969e3b121cc1082c442f8b4330f0a50058438fed56447175bb10178607ecfe425cb54dacc25ef26810f3e04681de1844f1aa8d029aca75d658634806,
-            );
+            try testArgs(i256, -0x53d4148cee74ea43477a65b3daa7b8fdadcbf4508e793f4af113b8d8da5a7eb6, -0x30dcbaf7b9b7a3df033694e6795444d842fb0b8f79bc18b3ea8a6b7ccad3ea91);
+            try testArgs(u256, 0xb7935f5c2f3b1ae7a422c0a7c446884294b7d5370bada307d2fe5a4c4284a999, 0x310e6e196ba4f143b8d285ca6addf7f3bb3344224aff221b27607a31e148be08);
+            try testArgs(i258, -0x0eee283365108dbeea0bec82f5147418d8ffe86f9eed00e414b4eccd65c21239a, -0x122c730073fc29a24cd6e3e6263566879bc5325d8566b8db31fcb4a76f7ab95eb);
+            try testArgs(u258, 0x186d5ddaab8cb8cb04e5b41e36f812e039d008baf49f12894c39e29a07796d800, 0x2072daba6ffad168826163eb136f6d28ca4360c8e7e5e41e29755e19e4753a4f5);
+            try testArgs(i495, 0x2fe6bc5448c55ce18252e2c9d44777505dfe63ff249a8027a6626c7d8dd9893fd5731e51474727be556f757facb586a4e04bbc0148c6c7ad692302f46fbd, -0x016a358821ef8240172f3a08e8830c06e6bcf2225f5f4d41ed42b44d249385f55cc594e1278ecac31c73faed890e5054af1a561483bb1bb6fb1f753514cf);
+            try testArgs(u495, 0x6eaf4e252b3bf74b75bac59e0b43ca5326bad2a25b3fdb74a67ef132ac5e47d72eebc3316fb2351ee66c50dc5afb92a75cea9b0e35160652c7db39eeb158, 0x49fbed744a92b549d8c05bb3512c617d24dd824f3f69bdf3923bc326a75674b85f5b828d2566fab9c86f571d12c2a63c9164feb0d191d27905533d09622a);
+            try testArgs(i512, -0x3a6876ca92775286c6e1504a64a9b8d56985bebf4a1b66539d404e0e96f24b226f70c4bcff295fdc2043b82513b2052dc45fd78f7e9e80e5b3e101757289f054, 0x5080c516a819bd32a0a5f0976441bbfbcf89e77684f1f10eb326aeb28e1f8d593278cff60fc99b8ffc87d8696882c64728dd3c322b7142803f4341f85a03bc10);
+            try testArgs(u512, 0xe5b1fedca3c77db765e517aabd05ffc524a3a8aff1784bbf67c45b894447ede32b65b9940e78173c591e56e078932d465f235aece7ad47b7f229df7ba8f12295, 0x8b4bb7c2969e3b121cc1082c442f8b4330f0a50058438fed56447175bb10178607ecfe425cb54dacc25ef26810f3e04681de1844f1aa8d029aca75d658634806);
         }
-        fn testFloatTypes() !void {
+        fn testFloats() !void {
             @setEvalBranchQuota(21_700);
 
             try testArgs(f16, -nan(f16), -nan(f16));
@@ -3161,7 +3421,7 @@ fn binary(comptime op: anytype, comptime opts: struct { strict: bool = false })
             try testArgs(f128, nan(f128), inf(f128));
             try testArgs(f128, nan(f128), nan(f128));
         }
-        fn testIntVectorTypes() !void {
+        fn testIntVectors() !void {
             try testArgs(@Vector(1, i8), .{
                 -0x54,
             }, .{
@@ -3781,7 +4041,7 @@ fn binary(comptime op: anytype, comptime opts: struct { strict: bool = false })
                 0xf1e3bbe031d59351770a7a501b6e969b2c00d144f17648db3f944b69dfeb7be72e5ff933a061eba4eaa422f8ca09e5a97d0b0dd740fd4076eba8c72d7a278523f399202dc2d043c4e0eb58a2bcd4066e2146e321810b1ee4d3afdddb4f026bcc7905ce17e033a7727b4e08f33b53c63d8c9f763fc6c31d0523eb38c30d5e40bc,
             });
         }
-        fn testFloatVectorTypes() !void {
+        fn testFloatVectors() !void {
             @setEvalBranchQuota(21_700);
 
             try testArgs(@Vector(1, f16), .{
@@ -4178,76 +4438,226 @@ inline fn bitNot(comptime Type: type, rhs: Type) @TypeOf(~rhs) {
     return ~rhs;
 }
 test bitNot {
-    const t = unary(bitNot, .{});
-    try t.testIntTypes();
-    try t.testIntVectorTypes();
+    const test_bit_not = unary(bitNot, .{});
+    try test_bit_not.testInts();
+    try test_bit_not.testIntVectors();
 }
 
 inline fn abs(comptime Type: type, rhs: Type) @TypeOf(@abs(rhs)) {
     return @abs(rhs);
 }
 test abs {
-    const t = unary(abs, .{ .strict = true });
-    try t.testIntTypes();
-    try t.testIntVectorTypes();
-    try t.testFloatTypes();
-    try t.testFloatVectorTypes();
+    const test_abs = unary(abs, .{ .strict = true });
+    try test_abs.testInts();
+    try test_abs.testIntVectors();
+    try test_abs.testFloats();
+    try test_abs.testFloatVectors();
 }
 
 inline fn clz(comptime Type: type, rhs: Type) @TypeOf(@clz(rhs)) {
     return @clz(rhs);
 }
 test clz {
-    const t = unary(clz, .{});
-    try t.testIntTypes();
-    try t.testIntVectorTypes();
+    const test_clz = unary(clz, .{});
+    try test_clz.testInts();
+    try test_clz.testIntVectors();
+}
+
+inline fn equal(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs == rhs) {
+    return lhs == rhs;
+}
+test equal {
+    const test_equal = binary(equal, .{});
+    try test_equal.testInts();
+    try test_equal.testFloats();
+}
+
+inline fn notEqual(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs != rhs) {
+    return lhs != rhs;
+}
+test notEqual {
+    const test_not_equal = binary(notEqual, .{});
+    try test_not_equal.testInts();
+    try test_not_equal.testFloats();
+}
+
+inline fn lessThan(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs < rhs) {
+    return lhs < rhs;
+}
+test lessThan {
+    const test_less_than = binary(lessThan, .{});
+    try test_less_than.testInts();
+    try test_less_than.testFloats();
+}
+
+inline fn lessThanOrEqual(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs <= rhs) {
+    return lhs <= rhs;
+}
+test lessThanOrEqual {
+    const test_less_than_or_equal = binary(lessThanOrEqual, .{});
+    try test_less_than_or_equal.testInts();
+    try test_less_than_or_equal.testFloats();
+}
+
+inline fn greaterThan(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs > rhs) {
+    return lhs > rhs;
+}
+test greaterThan {
+    const test_greater_than = binary(greaterThan, .{});
+    try test_greater_than.testInts();
+    try test_greater_than.testFloats();
+}
+
+inline fn greaterThanOrEqual(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs >= rhs) {
+    return lhs >= rhs;
+}
+test greaterThanOrEqual {
+    const test_greater_than_or_equal = binary(greaterThanOrEqual, .{});
+    try test_greater_than_or_equal.testInts();
+    try test_greater_than_or_equal.testFloats();
 }
 
 inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) {
     return lhs & rhs;
 }
 test bitAnd {
-    const t = binary(bitAnd, .{});
-    try t.testIntTypes();
-    try t.testIntVectorTypes();
+    const test_bit_and = binary(bitAnd, .{});
+    try test_bit_and.testInts();
+    try test_bit_and.testIntVectors();
 }
 
 inline fn bitOr(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs | rhs) {
     return lhs | rhs;
 }
 test bitOr {
-    const t = binary(bitOr, .{});
-    try t.testIntTypes();
-    try t.testIntVectorTypes();
+    const test_bit_or = binary(bitOr, .{});
+    try test_bit_or.testInts();
+    try test_bit_or.testIntVectors();
 }
 
 inline fn bitXor(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs ^ rhs) {
     return lhs ^ rhs;
 }
 test bitXor {
-    const t = binary(bitXor, .{});
-    try t.testIntTypes();
-    try t.testIntVectorTypes();
+    const test_bit_xor = binary(bitXor, .{});
+    try test_bit_xor.testInts();
+    try test_bit_xor.testIntVectors();
 }
 
 inline fn min(comptime Type: type, lhs: Type, rhs: Type) Type {
     return @min(lhs, rhs);
 }
 test min {
-    const t = binary(min, .{});
-    try t.testIntTypes();
-    try t.testIntVectorTypes();
-    try t.testFloatTypes();
-    try t.testFloatVectorTypes();
+    const test_min = binary(min, .{});
+    try test_min.testInts();
+    try test_min.testIntVectors();
+    try test_min.testFloats();
+    try test_min.testFloatVectors();
 }
 
 inline fn max(comptime Type: type, lhs: Type, rhs: Type) Type {
     return @max(lhs, rhs);
 }
 test max {
-    const t = binary(max, .{});
-    try t.testIntTypes();
-    try t.testIntVectorTypes();
-    try t.testFloatTypes();
-    try t.testFloatVectorTypes();
+    const test_max = binary(max, .{});
+    try test_max.testInts();
+    try test_max.testIntVectors();
+    try test_max.testFloats();
+    try test_max.testFloatVectors();
+}
+
+inline fn nullIsNull(comptime Type: type, _: Type) bool {
+    return runtime(?Type, null) == null;
+}
+test nullIsNull {
+    const test_null_is_null = unary(nullIsNull, .{});
+    try test_null_is_null.testIntTypes();
+    try test_null_is_null.testIntVectorTypes();
+    try test_null_is_null.testFloatTypes();
+    try test_null_is_null.testFloatVectorTypes();
+}
+
+inline fn nullIsNotNull(comptime Type: type, _: Type) bool {
+    return runtime(?Type, null) != null;
+}
+test nullIsNotNull {
+    const test_null_is_not_null = unary(nullIsNotNull, .{});
+    try test_null_is_not_null.testIntTypes();
+    try test_null_is_not_null.testIntVectorTypes();
+    try test_null_is_not_null.testFloatTypes();
+    try test_null_is_not_null.testFloatVectorTypes();
+}
+
+inline fn optionalIsNull(comptime Type: type, lhs: Type) bool {
+    return @as(?Type, lhs) == null;
+}
+test optionalIsNull {
+    const test_optional_is_null = unary(optionalIsNull, .{});
+    try test_optional_is_null.testInts();
+    try test_optional_is_null.testFloats();
+}
+
+inline fn optionalIsNotNull(comptime Type: type, lhs: Type) bool {
+    return @as(?Type, lhs) != null;
+}
+test optionalIsNotNull {
+    const test_optional_is_not_null = unary(optionalIsNotNull, .{});
+    try test_optional_is_not_null.testInts();
+    try test_optional_is_not_null.testFloats();
+}
+
+inline fn nullEqualNull(comptime Type: type, _: Type) bool {
+    return runtime(?Type, null) == runtime(?Type, null);
+}
+test nullEqualNull {
+    const test_null_equal_null = unary(nullEqualNull, .{});
+    try test_null_equal_null.testIntTypes();
+    try test_null_equal_null.testFloatTypes();
+}
+
+inline fn nullNotEqualNull(comptime Type: type, _: Type) bool {
+    return runtime(?Type, null) != runtime(?Type, null);
+}
+test nullNotEqualNull {
+    const test_null_not_equal_null = unary(nullNotEqualNull, .{});
+    try test_null_not_equal_null.testIntTypes();
+    try test_null_not_equal_null.testFloatTypes();
+}
+
+inline fn optionalEqualNull(comptime Type: type, lhs: Type) bool {
+    return lhs == runtime(?Type, null);
+}
+test optionalEqualNull {
+    const test_optional_equal_null = unary(optionalEqualNull, .{});
+    try test_optional_equal_null.testInts();
+    try test_optional_equal_null.testFloats();
+}
+
+inline fn optionalNotEqualNull(comptime Type: type, lhs: Type) bool {
+    return lhs != runtime(?Type, null);
+}
+test optionalNotEqualNull {
+    const test_optional_not_equal_null = unary(optionalIsNotNull, .{});
+    try test_optional_not_equal_null.testInts();
+    try test_optional_not_equal_null.testFloats();
+}
+
+inline fn optionalsEqual(comptime Type: type, lhs: Type, rhs: Type) bool {
+    if (@inComptime()) return lhs == rhs; // workaround https://github.com/ziglang/zig/issues/22636
+    return @as(?Type, lhs) == rhs;
+}
+test optionalsEqual {
+    const test_optionals_equal = binary(optionalsEqual, .{});
+    try test_optionals_equal.testInts();
+    try test_optionals_equal.testFloats();
+}
+
+inline fn optionalsNotEqual(comptime Type: type, lhs: Type, rhs: Type) bool {
+    if (@inComptime()) return lhs != rhs; // workaround https://github.com/ziglang/zig/issues/22636
+    return lhs != @as(?Type, rhs);
+}
+test optionalsNotEqual {
+    const test_optionals_not_equal = binary(optionalsNotEqual, .{});
+    try test_optionals_not_equal.testInts();
+    try test_optionals_not_equal.testFloats();
 }