Commit b1fa89439a

Jacob Young <jacobly0@users.noreply.github.com>
2025-01-23 11:31:46
x86_64: rewrite float vector `@abs` and equality comparisons
1 parent ae3d95f
Changed files (13)
lib/std/zig/Zir.zig
@@ -2128,7 +2128,7 @@ pub const Inst = struct {
         ref_start_index = static_len,
         _,
 
-        pub const static_len = 70;
+        pub const static_len = 76;
 
         pub fn toRef(i: Index) Inst.Ref {
             return @enumFromInt(@intFromEnum(Index.ref_start_index) + @intFromEnum(i));
@@ -2211,6 +2211,12 @@ pub const Inst = struct {
         single_const_pointer_to_comptime_int_type,
         slice_const_u8_type,
         slice_const_u8_sentinel_0_type,
+        vector_4_f16_type,
+        vector_8_f16_type,
+        vector_4_f32_type,
+        vector_8_f32_type,
+        vector_2_f64_type,
+        vector_4_f64_type,
         optional_noreturn_type,
         anyerror_void_error_union_type,
         adhoc_inferred_error_set_type,
src/arch/x86_64/bits.zig
@@ -366,6 +366,7 @@ pub const Register = enum(u8) {
             @intFromEnum(Register.eax)  ... @intFromEnum(Register.r15d)  => @intFromEnum(Register.eax),
             @intFromEnum(Register.ax)   ... @intFromEnum(Register.r15w)  => @intFromEnum(Register.ax),
             @intFromEnum(Register.al)   ... @intFromEnum(Register.r15b)  => @intFromEnum(Register.al),
+            @intFromEnum(Register.ah)   ... @intFromEnum(Register.bh)    => @intFromEnum(Register.ah),
             else => unreachable,
             // zig fmt: on
         };
src/arch/x86_64/CodeGen.zig
@@ -2393,7 +2393,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
 }
 
 fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
-    @setEvalBranchQuota(1_800);
+    @setEvalBranchQuota(2_700);
     const pt = cg.pt;
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
@@ -2984,6 +2984,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.mem},
                         .clobbers = .{ .eflags = true },
@@ -3008,6 +3011,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.mem},
                         .clobbers = .{ .eflags = true },
@@ -3032,6 +3038,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.mem},
                         .clobbers = .{ .eflags = true },
@@ -3056,6 +3065,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.mem},
                         .clobbers = .{ .eflags = true },
@@ -3080,6 +3092,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.mem},
                         .clobbers = .{ .eflags = true },
@@ -3104,6 +3119,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.mem},
                         .clobbers = .{ .eflags = true },
@@ -3127,6 +3145,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.mem},
                         .clobbers = .{ .eflags = true },
@@ -3265,6 +3286,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .each = .{ .once = &.{
@@ -3296,6 +3320,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
@@ -3327,6 +3354,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .each = .{ .once = &.{
@@ -3346,6 +3376,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .each = .{ .once = &.{
@@ -3377,6 +3410,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
@@ -3392,7 +3428,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
-                        .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) },
+                        .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .ui(0b01111) },
                         .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ },
                     } },
                 }, .{
@@ -3408,6 +3444,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
@@ -3427,6 +3466,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3453,6 +3495,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3477,12 +3522,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ },
-                        .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) },
+                        .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .ui(0b01111) },
                         .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ },
                         .{ ._, .v_pd, .movu, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
@@ -3503,12 +3551,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) },
+                        .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .ui(0b01111) },
                         .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ },
                         .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
@@ -3527,6 +3578,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3551,6 +3605,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3576,6 +3633,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
@@ -3599,6 +3659,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3623,6 +3686,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
@@ -3647,6 +3713,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3676,6 +3745,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
@@ -3700,6 +3772,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3725,6 +3800,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
@@ -3748,6 +3826,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3776,6 +3857,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
@@ -3800,6 +3884,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3829,6 +3916,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
@@ -3852,6 +3942,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3880,6 +3973,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
@@ -3905,6 +4001,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3933,6 +4032,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .clobbers = .{ .eflags = true },
@@ -3957,6 +4059,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -3996,6 +4101,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .each = .{ .once = &.{
@@ -4027,6 +4135,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
@@ -4058,6 +4169,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .each = .{ .once = &.{
@@ -4077,6 +4191,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .ref = .src0 }},
                     .each = .{ .once = &.{
@@ -4108,6 +4225,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
@@ -4123,7 +4243,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
-                        .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) },
+                        .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .ui(0b01111) },
                         .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ },
                     } },
                 }, .{
@@ -4139,6 +4259,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
@@ -4157,6 +4280,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .kind = .{ .umax_mem = .{ .ref = .src0 } } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .each = .{ .once = &.{
@@ -4179,6 +4305,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .kind = .{ .umax_mem = .{ .ref = .src0 } } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .each = .{ .once = &.{
@@ -4517,6 +4646,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4541,6 +4673,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4566,6 +4701,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4592,6 +4730,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4650,6 +4791,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4676,6 +4820,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4700,6 +4847,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4722,6 +4872,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4745,6 +4898,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4769,6 +4925,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4894,6 +5053,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -4917,6 +5079,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5041,6 +5206,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5064,6 +5232,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5101,6 +5272,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5156,6 +5330,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5211,6 +5388,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5236,6 +5416,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5258,6 +5441,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5285,6 +5471,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5311,6 +5500,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5339,6 +5531,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5366,6 +5561,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5393,6 +5591,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5419,6 +5620,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5447,6 +5651,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5474,6 +5681,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5504,6 +5714,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5533,6 +5746,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5562,6 +5778,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5592,6 +5811,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5621,6 +5843,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.{ .rc = .general_purpose }},
                     .clobbers = .{ .eflags = true },
@@ -5650,6 +5875,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5676,6 +5904,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5702,6 +5933,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5728,6 +5962,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5754,6 +5991,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5780,6 +6020,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5806,6 +6049,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5832,6 +6078,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5858,6 +6107,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5887,6 +6139,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5916,7 +6171,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
-                    },
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
@@ -5945,6 +6203,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -5974,6 +6235,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6001,6 +6265,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6029,6 +6296,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6058,6 +6328,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6087,6 +6360,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6116,6 +6392,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6145,6 +6424,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6172,6 +6454,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6200,6 +6485,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6229,6 +6517,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6258,6 +6549,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6287,6 +6581,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6316,6 +6613,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6343,6 +6643,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6371,6 +6674,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6400,6 +6706,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6429,6 +6738,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6458,6 +6770,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6487,6 +6802,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6515,6 +6833,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6544,6 +6865,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6581,6 +6905,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6617,6 +6944,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6653,6 +6983,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6690,6 +7023,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6726,6 +7062,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6762,6 +7101,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6799,6 +7141,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6835,6 +7180,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6871,6 +7219,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6908,6 +7259,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6944,6 +7298,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -6981,8 +7338,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
                 const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data;
                 switch (extra.compareOperator()) {
-                    .eq, .neq => if (cg.typeOf(extra.lhs).scalarType(zcu).isRuntimeFloat())
-                        break :fallback try cg.airCmpVector(inst),
+                    .eq, .neq => {},
                     else => break :fallback try cg.airCmpVector(inst),
                 }
                 var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs });
@@ -7004,7 +7360,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
                                 .{ .src = .{ .to_ymm, .to_ymm } },
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+                            .dst_temps = .{.{ .mut_rc_mask = .{ .ref = .src0, .rc = .sse, .info = .{
                                 .kind = .all,
                                 .inverted = switch (cc) {
                                     else => unreachable,
@@ -7024,7 +7380,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
                                 .{ .src = .{ .to_ymm, .to_ymm } },
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+                            .dst_temps = .{.{ .mut_rc_mask = .{ .ref = .src0, .rc = .sse, .info = .{
                                 .kind = .all,
                                 .inverted = switch (cc) {
                                     else => unreachable,
@@ -7044,7 +7400,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
                                 .{ .src = .{ .to_ymm, .to_ymm } },
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+                            .dst_temps = .{.{ .mut_rc_mask = .{ .ref = .src0, .rc = .sse, .info = .{
                                 .kind = .all,
                                 .inverted = switch (cc) {
                                     else => unreachable,
@@ -7064,7 +7420,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } },
                                 .{ .src = .{ .to_ymm, .to_ymm } },
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+                            .dst_temps = .{.{ .mut_rc_mask = .{ .ref = .src0, .rc = .sse, .info = .{
                                 .kind = .all,
                                 .inverted = switch (cc) {
                                     else => unreachable,
@@ -7084,7 +7440,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
                                 .{ .src = .{ .to_xmm, .to_xmm } },
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+                            .dst_temps = .{.{ .mut_rc_mask = .{ .ref = .src0, .rc = .sse, .info = .{
                                 .kind = .all,
                                 .inverted = switch (cc) {
                                     else => unreachable,
@@ -7104,7 +7460,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
                                 .{ .src = .{ .to_xmm, .to_xmm } },
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+                            .dst_temps = .{.{ .mut_rc_mask = .{ .ref = .src0, .rc = .sse, .info = .{
                                 .kind = .all,
                                 .inverted = switch (cc) {
                                     else => unreachable,
@@ -7124,7 +7480,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
                                 .{ .src = .{ .to_xmm, .to_xmm } },
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+                            .dst_temps = .{.{ .mut_rc_mask = .{ .ref = .src0, .rc = .sse, .info = .{
                                 .kind = .all,
                                 .inverted = switch (cc) {
                                     else => unreachable,
@@ -7144,7 +7500,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } },
                                 .{ .src = .{ .to_xmm, .to_xmm } },
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
+                            .dst_temps = .{.{ .mut_rc_mask = .{ .ref = .src0, .rc = .sse, .info = .{
                                 .kind = .all,
                                 .inverted = switch (cc) {
                                     else => unreachable,
@@ -7409,6 +7765,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .unused,
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7445,6 +7804,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7487,6 +7849,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7531,6 +7896,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7573,6 +7941,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7641,6 +8012,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7683,6 +8057,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7727,6 +8104,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7795,6 +8175,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7863,6 +8246,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7905,6 +8291,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -7949,6 +8338,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -8017,6 +8409,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .{ .kind = .{ .rc = .sse } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -8085,6 +8480,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .mmx } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -8127,6 +8525,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .mmx } },
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .{ .kind = .{ .rc = .mmx } },
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -8199,6 +8600,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .kind = .{ .rc = .mmx } },
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .{ .kind = .{ .rc = .mmx } },
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -8273,6 +8677,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .unused,
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8302,6 +8709,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .unused,
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8331,6 +8741,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .unused,
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8361,6 +8774,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .unused,
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8390,6 +8806,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8426,6 +8845,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8456,6 +8878,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8486,6 +8911,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8517,6 +8945,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8547,6 +8978,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8585,6 +9019,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8616,6 +9053,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8646,6 +9086,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8677,6 +9120,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8708,6 +9154,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.{ .rc = .general_purpose }},
                             .clobbers = .{ .eflags = true },
@@ -8744,6 +9193,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
                                 .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
                             .dst_temps = .{.mem},
                             .clobbers = .{ .eflags = true },
@@ -8775,8 +9227,42 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         }, .{
                             .required_features = .{ .f16c, null, null, null },
                             .src_constraints = .{
-                                .{ .scalar_exact_float = .{ .of = .qword, .is = .word } },
-                                .{ .scalar_exact_float = .{ .of = .qword, .is = .word } },
+                                .{ .scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_sse, .to_sse } },
+                            },
+                            .extra_temps = .{
+                                .{ .kind = .{ .rc = .sse } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .dword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, .v_ps, .cvtph2, .dst0x, .src0x, ._, ._ },
+                                .{ ._, .v_ps, .cvtph2, .tmp0x, .src1x, ._, ._ },
+                                .{ ._, .v_ss, .cmp, .dst0x, .dst0x, .tmp0x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                            } },
+                        }, .{
+                            .required_features = .{ .f16c, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .qword, .is = .word } },
+                                .{ .scalar_float = .{ .of = .qword, .is = .word } },
                             },
                             .patterns = &.{
                                 .{ .src = .{ .mem, .mem } },
@@ -8791,15 +9277,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .unused,
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
-                                .kind = .all,
-                                .scalar = .dword,
-                            } } }},
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .dword },
+                            } }},
                             .each = .{ .once = &.{
                                 .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ },
                                 .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ },
-                                .{ ._, .v_ps, .cmp, .dst0x, .dst0x, .tmp0x, .si(switch (cc) {
+                                .{ ._, .v_ps, .cmp, .dst0x, .dst0x, .tmp0x, .ui(switch (cc) {
                                     else => unreachable,
                                     .e => 0b00000,
                                     .ne => 0b00100,
@@ -8808,8 +9298,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         }, .{
                             .required_features = .{ .f16c, null, null, null },
                             .src_constraints = .{
-                                .{ .scalar_exact_float = .{ .of = .xword, .is = .word } },
-                                .{ .scalar_exact_float = .{ .of = .xword, .is = .word } },
+                                .{ .scalar_float = .{ .of = .xword, .is = .word } },
+                                .{ .scalar_float = .{ .of = .xword, .is = .word } },
                             },
                             .patterns = &.{
                                 .{ .src = .{ .mem, .mem } },
@@ -8824,44 +9314,2313 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .unused,
                                 .unused,
                                 .unused,
+                                .unused,
+                                .unused,
+                                .unused,
                             },
-                            .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{
-                                .kind = .all,
-                                .scalar = .dword,
-                            } } }},
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .dword },
+                            } }},
                             .each = .{ .once = &.{
                                 .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ },
                                 .{ ._, .v_ps, .cvtph2, .tmp0y, .src1x, ._, ._ },
-                                .{ ._, .v_ps, .cmp, .dst0y, .dst0y, .tmp0y, .si(switch (cc) {
+                                .{ ._, .v_ps, .cmp, .dst0y, .dst0y, .tmp0y, .ui(switch (cc) {
                                     else => unreachable,
                                     .e => 0b00000,
                                     .ne => 0b00100,
                                 }) },
                             } },
-                        } },
-                    }) catch |err| switch (err) {
-                        error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
-                            @tagName(air_tag),
-                            cg.typeOf(extra.lhs).fmt(pt),
-                            ops[0].tracking(cg),
-                            ops[1].tracking(cg),
-                        }),
-                        else => |e| return e,
-                    },
-                    .gte => unreachable,
-                    .gt => unreachable,
-                }
-                try res[0].finish(inst, &.{ extra.lhs, extra.rhs }, &ops, cg);
-            },
-
-            .abs => |air_tag| if (use_old) try cg.airAbs(inst) else fallback: {
-                const ty_op = air_datas[@intFromEnum(inst)].ty_op;
-                if (ty_op.ty.toType().isVector(zcu) and ty_op.ty.toType().childType(zcu).isRuntimeFloat()) break :fallback try cg.airAbs(inst);
-                var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
-                var res: [1]Temp = undefined;
-                cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{
-                    .required_features = .{ .cmov, null, null, null },
-                    .src_constraints = .{ .{ .int = .byte }, .any },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .dword, .is = .dword } },
+                                .{ .scalar_float = .{ .of = .dword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_sse, .mem } },
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .dword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, .v_ss, .cmp, .dst0x, .src0x, .src1x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .dword, .is = .dword } },
+                                .{ .scalar_float = .{ .of = .dword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mut_sse, .mem } },
+                                .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_mut_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .ref_mask = .{
+                                .ref = .src0,
+                                .info = .{ .kind = .all, .scalar = .dword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, ._ss, .cmp, .dst0x, .src1x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .xword, .is = .dword } },
+                                .{ .scalar_float = .{ .of = .xword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_sse, .mem } },
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .dword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, .v_ps, .cmp, .dst0x, .src0x, .src1x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .xword, .is = .dword } },
+                                .{ .scalar_float = .{ .of = .xword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mut_sse, .mem } },
+                                .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_mut_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .ref_mask = .{
+                                .ref = .src0,
+                                .info = .{ .kind = .all, .scalar = .dword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, ._ps, .cmp, .dst0x, .src1x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .yword, .is = .dword } },
+                                .{ .scalar_float = .{ .of = .yword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_sse, .mem } },
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .dword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, .v_ps, .cmp, .dst0y, .src0y, .src1y, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .qword, .is = .qword } },
+                                .{ .scalar_float = .{ .of = .qword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_sse, .mem } },
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .qword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, .v_sd, .cmp, .dst0x, .src0x, .src1x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .qword, .is = .qword } },
+                                .{ .scalar_float = .{ .of = .qword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mut_sse, .mem } },
+                                .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_mut_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .ref_mask = .{
+                                .ref = .src0,
+                                .info = .{ .kind = .all, .scalar = .qword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, ._sd, .cmp, .dst0x, .src1x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .xword, .is = .qword } },
+                                .{ .scalar_float = .{ .of = .xword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_sse, .mem } },
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .qword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, .v_pd, .cmp, .dst0x, .src0x, .src1x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .xword, .is = .qword } },
+                                .{ .scalar_float = .{ .of = .xword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mut_sse, .mem } },
+                                .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_mut_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .ref_mask = .{
+                                .ref = .src0,
+                                .info = .{ .kind = .all, .scalar = .qword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, ._pd, .cmp, .dst0x, .src1x, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .scalar_float = .{ .of = .yword, .is = .qword } },
+                                .{ .scalar_float = .{ .of = .yword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_sse, .mem } },
+                                .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                                .{ .src = .{ .to_sse, .to_sse } },
+                            },
+                            .dst_temps = .{.{ .mut_rc_mask = .{
+                                .ref = .src0,
+                                .rc = .sse,
+                                .info = .{ .kind = .all, .scalar = .qword },
+                            } }},
+                            .each = .{ .once = &.{
+                                .{ ._, .v_pd, .cmp, .dst0y, .src0y, .src1y, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                            } },
+                        }, .{
+                            .required_features = .{ .f16c, .slow_incdec, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } },
+                                .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ .@"0:", .v_ps, .cvtph2, .tmp2y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_ps, .cvtph2, .tmp3y, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_ps, .cmp, .tmp2y, .tmp2y, .tmp3y, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_ps, .movmsk, .tmp4d, .tmp2y, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp4b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .f16c, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } },
+                                .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ .@"0:", .v_ps, .cvtph2, .tmp2y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_ps, .cvtph2, .tmp3y, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_ps, .cmp, .tmp2y, .tmp2y, .tmp3y, .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_ps, .movmsk, .tmp4d, .tmp2y, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp4b, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1p, ._, ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ .@"0:", .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ },
+                                .{ ._, .vp_w, .insr, .tmp1x, .tmp2x, .memsi(.src0w, .@"2", .tmp0), .ui(0) },
+                                .{ ._, .vp_w, .insr, .tmp2x, .tmp2x, .memsi(.src1w, .@"2", .tmp0), .ui(0) },
+                                .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp6b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp6d, .tmp5b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .add, .tmp0d, .si(1), ._, ._ },
+                                .{ ._, ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ .@"0:", .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ },
+                                .{ ._, .vp_w, .insr, .tmp1x, .tmp2x, .memsi(.src0w, .@"2", .tmp0), .ui(0) },
+                                .{ ._, .vp_w, .insr, .tmp2x, .tmp2x, .memsi(.src1w, .@"2", .tmp0), .ui(0) },
+                                .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp6b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp6d, .tmp5b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp6d, ._, ._ },
+                                .{ ._, ._c, .in, .tmp0d, ._, ._, ._ },
+                                .{ ._, ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
+                                .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
+                                .{ ._, .p_w, .insr, .tmp1x, .memsi(.src0w, .@"2", .tmp0), .ui(0), ._ },
+                                .{ ._, .p_w, .insr, .tmp2x, .memsi(.src1w, .@"2", .tmp0), .ui(0), ._ },
+                                .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp6b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp6d, .tmp5b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .add, .tmp0d, .si(1), ._, ._ },
+                                .{ ._, ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, null, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ },
+                                .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
+                                .{ ._, .p_w, .insr, .tmp1x, .memsi(.src0w, .@"2", .tmp0), .ui(0), ._ },
+                                .{ ._, .p_w, .insr, .tmp2x, .memsi(.src1w, .@"2", .tmp0), .ui(0), ._ },
+                                .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp6b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp6d, .tmp5b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp6d, ._, ._ },
+                                .{ ._, ._c, .in, .tmp0d, ._, ._, ._ },
+                                .{ ._, ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .{ .type = .vector_8_f16, .kind = .mem },
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ .@"0:", ._, .movzx, .tmp4d, .memsi(.src0w, .@"2", .tmp0), ._, ._ },
+                                .{ ._, ._, .mov, .mem(.tmp7d), .tmp4d, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp1x, .mem(.tmp7x), ._, ._ },
+                                .{ ._, ._, .movzx, .tmp4d, .memsi(.src1w, .@"2", .tmp0), ._, ._ },
+                                .{ ._, ._, .mov, .mem(.tmp7d), .tmp4d, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp2x, .mem(.tmp7x), ._, ._ },
+                                .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp6b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp6d, .tmp5b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .add, .tmp0d, .si(1), ._, ._ },
+                                .{ ._, ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, null, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .{ .type = .vector_8_f16, .kind = .mem },
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ .@"0:", ._, .movzx, .tmp4d, .memsi(.src0w, .@"2", .tmp0), ._, ._ },
+                                .{ ._, ._, .mov, .mem(.tmp7d), .tmp4d, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp1x, .mem(.tmp7x), ._, ._ },
+                                .{ ._, ._, .movzx, .tmp4d, .memsi(.src1w, .@"2", .tmp0), ._, ._ },
+                                .{ ._, ._, .mov, .mem(.tmp7d), .tmp4d, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp2x, .mem(.tmp7x), ._, ._ },
+                                .{ ._, ._, .call, .tmp3d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp6b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp6d, .tmp5b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp6d, ._, ._ },
+                                .{ ._, ._c, .in, .tmp0d, ._, ._, ._ },
+                                .{ ._, ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .avx, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", .vp_, .xor, .tmp3x, .tmp3x, .tmp3x, ._ },
+                                .{ ._, .vp_w, .insr, .tmp2x, .tmp3x, .memsi(.src0w, .@"2", .tmp0), .ui(0) },
+                                .{ ._, .vp_w, .insr, .tmp3x, .tmp3x, .memsi(.src1w, .@"2", .tmp0), .ui(0) },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7q, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp1q, .tmp7q, ._, ._ },
+                                .{ ._, ._, .lea, .tmp0d, .lead(.none, .tmp0, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp5, -8), .tmp1q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"1:", ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .avx, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", .vp_, .xor, .tmp3x, .tmp3x, .tmp3x, ._ },
+                                .{ ._, .vp_w, .insr, .tmp2x, .tmp3x, .memsi(.src0w, .@"2", .tmp0), .ui(0) },
+                                .{ ._, .vp_w, .insr, .tmp3x, .tmp3x, .memsi(.src1w, .@"2", .tmp0), .ui(0) },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7q, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp1q, .tmp7q, ._, ._ },
+                                .{ ._, ._c, .in, .tmp0d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp5, -8), .tmp1q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"1:", ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .sse2, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
+                                .{ ._, .p_w, .insr, .tmp2x, .memsi(.src0w, .@"2", .tmp0), .ui(0), ._ },
+                                .{ ._, .p_, .xor, .tmp3x, .tmp3x, ._, ._ },
+                                .{ ._, .p_w, .insr, .tmp3x, .memsi(.src1w, .@"2", .tmp0), .ui(0), ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7q, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp1q, .tmp7q, ._, ._ },
+                                .{ ._, ._, .lea, .tmp0d, .lead(.none, .tmp0, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp5, -8), .tmp1q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"1:", ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .sse2, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", .p_, .xor, .tmp2x, .tmp2x, ._, ._ },
+                                .{ ._, .p_w, .insr, .tmp2x, .memsi(.src0w, .@"2", .tmp0), .ui(0), ._ },
+                                .{ ._, .p_, .xor, .tmp3x, .tmp3x, ._, ._ },
+                                .{ ._, .p_w, .insr, .tmp3x, .memsi(.src1w, .@"2", .tmp0), .ui(0), ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7q, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp1q, .tmp7q, ._, ._ },
+                                .{ ._, ._c, .in, .tmp0d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp5, -8), .tmp1q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"1:", ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .sse, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                                .{ .type = .vector_8_f16, .kind = .mem },
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", ._, .movzx, .tmp5d, .memsi(.src0w, .@"2", .tmp0), ._, ._ },
+                                .{ ._, ._, .mov, .mem(.tmp8d), .tmp5d, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp2x, .mem(.tmp8x), ._, ._ },
+                                .{ ._, ._, .movzx, .tmp5d, .memsi(.src1w, .@"2", .tmp0), ._, ._ },
+                                .{ ._, ._, .mov, .mem(.tmp8d), .tmp5d, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp3x, .mem(.tmp8x), ._, ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7q, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp1q, .tmp7q, ._, ._ },
+                                .{ ._, ._, .lea, .tmp0d, .lead(.none, .tmp0, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp5, -8), .tmp1q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"1:", ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .sse, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                                .{ .multiple_scalar_float = .{ .of = .word, .is = .word } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f16, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqhf2",
+                                    .ne => "__nehf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                                .{ .type = .vector_8_f16, .kind = .mem },
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", ._, .movzx, .tmp5d, .memsi(.src0w, .@"2", .tmp0), ._, ._ },
+                                .{ ._, ._, .mov, .mem(.tmp8d), .tmp5d, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp2x, .mem(.tmp8x), ._, ._ },
+                                .{ ._, ._, .movzx, .tmp5d, .memsi(.src1w, .@"2", .tmp0), ._, ._ },
+                                .{ ._, ._, .mov, .mem(.tmp8d), .tmp5d, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp3x, .mem(.tmp8x), ._, ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp0d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7q, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp1q, .tmp7q, ._, ._ },
+                                .{ ._, ._c, .in, .tmp0d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp5, -8), .tmp1q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"1:", ._, .cmp, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                                .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp0d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp5d, .tmp0d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp5d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp5), .tmp1q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, .slow_incdec, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ .@"0:", .v_ps, .mova, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_ps, .cmp, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_ps, .movmsk, .tmp3d, .tmp2y, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp3b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ .@"0:", .v_ps, .mova, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_ps, .cmp, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_ps, .movmsk, .tmp3d, .tmp2y, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp3b, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1q, ._, ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+                                .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ .@"0:", ._ps, .mova, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp3x, .memiad(.src0x, .tmp0, .add_size, 16), ._, ._ },
+                                .{ ._, ._ps, .cmp, .tmp2x, .memia(.src1x, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                                .{ ._, ._ps, .cmp, .tmp3x, .memiad(.src1x, .tmp0, .add_size, 16), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                                .{ ._, .p_w, .ackssd, .tmp2x, .tmp3x, ._, ._ },
+                                .{ ._, .p_b, .ackssw, .tmp2x, .tmp2x, ._, ._ },
+                                .{ ._, .p_b, .movmsk, .tmp4d, .tmp2x, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp4b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ .@"0:", ._ps, .mova, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._ps, .cmp, .tmp2x, .memia(.src1x, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                                .{ ._, ._ps, .movmsk, .tmp3d, .tmp2x, ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp2x, .memiad(.src0x, .tmp0, .add_size, 16), ._, ._ },
+                                .{ ._, ._ps, .cmp, .tmp2x, .memiad(.src1x, .tmp0, .add_size, 16), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                                .{ ._, ._ps, .movmsk, .tmp4d, .tmp2x, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
+                                .{ ._, ._, .@"or", .tmp3b, .tmp4b, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp3b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
+                                .{ .@"0:", ._ps, .mova, .tmp2x, .memiad(.src0x, .tmp0, .add_size, -16), ._, ._ },
+                                .{ ._, ._ps, .cmp, .tmp2x, .memiad(.src1x, .tmp0, .add_size, -16), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                                .{ ._, ._ps, .movmsk, .tmp4d, .tmp2x, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
+                                .{ ._, ._, .@"or", .tmp3b, .tmp4b, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp3b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ .@"1:", ._ps, .mova, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._ps, .cmp, .tmp2x, .memia(.src1x, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                                .{ ._, ._ps, .movmsk, .tmp3d, .tmp2x, ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp3b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, .slow_incdec, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .zword, .is = .qword } },
+                                .{ .multiple_scalar_float = .{ .of = .zword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } },
+                                .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ .@"0:", .v_pd, .mova, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_pd, .mova, .tmp3y, .memiad(.src0y, .tmp0, .add_size, 32), ._, ._ },
+                                .{ ._, .v_pd, .cmp, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_pd, .cmp, .tmp3y, .tmp3y, .memiad(.src1y, .tmp0, .add_size, 32), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_pd, .movmsk, .tmp4d, .tmp2y, ._, ._ },
+                                .{ ._, .v_pd, .movmsk, .tmp5d, .tmp3y, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp5b, .ui(4), ._, ._ },
+                                .{ ._, ._, .@"or", .tmp4b, .tmp5b, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp4b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .zword, .is = .qword } },
+                                .{ .multiple_scalar_float = .{ .of = .zword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } },
+                                .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ .@"0:", .v_pd, .mova, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_pd, .mova, .tmp3y, .memiad(.src0y, .tmp0, .add_size, 32), ._, ._ },
+                                .{ ._, .v_pd, .cmp, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_pd, .cmp, .tmp3y, .tmp3y, .memiad(.src1y, .tmp0, .add_size, 32), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_pd, .movmsk, .tmp4d, .tmp2y, ._, ._ },
+                                .{ ._, .v_pd, .movmsk, .tmp5d, .tmp3y, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp5b, .ui(4), ._, ._ },
+                                .{ ._, ._, .@"or", .tmp4b, .tmp5b, ._, ._ },
+                                .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp4b, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1q, ._, ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } },
+                                .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", .v_pd, .mova, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_pd, .cmp, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }) },
+                                .{ ._, .v_pd, .movmsk, .tmp4d, .tmp3y, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._pd, .mova, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._pd, .cmp, .tmp3x, .memia(.src1x, .tmp0, .add_size), .ui(switch (cc) {
+                                    else => unreachable,
+                                    .e => 0b00000,
+                                    .ne => 0b00100,
+                                }), ._ },
+                                .{ ._, ._pd, .movmsk, .tmp4d, .tmp3x, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp4d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, .cmov, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } },
+                                .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src1q, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src0q, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_p, .ucomi, .tmp5t, .tmp6t, ._, ._ },
+                                .{ ._, .f_p, .st, .tmp6t, ._, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
+                                .{ ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => ._np,
+                                    .ne => ._p,
+                                }, .set, .tmp4b, ._, ._, ._ },
+                                .{ ._, ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => .@"and",
+                                    .ne => .@"or",
+                                }, .tmp3b, .tmp4b, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, .cmov, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } },
+                                .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src1q, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src0q, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_p, .ucomi, .tmp5t, .tmp6t, ._, ._ },
+                                .{ ._, .f_p, .st, .tmp6t, ._, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
+                                .{ ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => ._np,
+                                    .ne => ._p,
+                                }, .set, .tmp4b, ._, ._, ._ },
+                                .{ ._, ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => .@"and",
+                                    .ne => .@"or",
+                                }, .tmp3b, .tmp4b, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } },
+                                .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src1q, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src0q, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                .{ ._, .fn_sw, .st, .tmp6w, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp6b, .si(0b0_1_000_000), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6b, .si(0b0_1_000_100), ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, .cmov, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_p, .ucomi, .tmp5t, .tmp6t, ._, ._ },
+                                .{ ._, .f_p, .st, .tmp6t, ._, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
+                                .{ ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => ._np,
+                                    .ne => ._p,
+                                }, .set, .tmp4b, ._, ._, ._ },
+                                .{ ._, ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => .@"and",
+                                    .ne => .@"or",
+                                }, .tmp3b, .tmp4b, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, .cmov, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_p, .ucomi, .tmp5t, .tmp6t, ._, ._ },
+                                .{ ._, .f_p, .st, .tmp6t, ._, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
+                                .{ ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => ._np,
+                                    .ne => ._p,
+                                }, .set, .tmp4b, ._, ._, ._ },
+                                .{ ._, ._, switch (cc) {
+                                    else => unreachable,
+                                    .e => .@"and",
+                                    .ne => .@"or",
+                                }, .tmp3b, .tmp4b, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .x87, null, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .reg = .rcx } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f80, .kind = .{ .reg = .st6 } },
+                                .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                                .{ .type = .u8, .kind = .{ .reg = .ah } },
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ },
+                                .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
+                                .{ ._, .fn_sw, .st, .tmp6w, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp6b, .si(0b0_1_000_000), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6b, .si(0b0_1_000_100), ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ },
+                                .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp3d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", .v_dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_dqa, .mov, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7d, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .avx, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", .v_dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_dqa, .mov, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7d, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp7d, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", ._dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._dqa, .mov, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7d, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", ._dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._dqa, .mov, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7d, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp7d, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", ._ps, .mova, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7d, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .sse2, .slow_incdec, null, null },
+                            .dst_constraints = .{.{ .bool_vec = .dword }},
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u32, .kind = .{ .reg = .edx } },
+                                .unused,
+                            },
+                            .dst_temps = .{.{ .rc = .general_purpose }},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ },
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ .@"0:", ._ps, .mova, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp4d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp7b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp7d, .tmp6b, ._, ._ },
+                                .{ ._, ._, .@"or", .dst0d, .tmp7d, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .avx, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", .v_dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp8d, .tmp8d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp8b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp7d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp8q, .tmp7b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2q, .tmp8q, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp6, -8), .tmp2q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .avx, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", .v_dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, .v_dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp8d, .tmp8d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp8b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp7d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp8q, .tmp7b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2q, .tmp8q, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp6, -8), .tmp2q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .sse2, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp8d, .tmp8d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp8b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp7d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp8q, .tmp7b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2q, .tmp8q, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp6, -8), .tmp2q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .sse2, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp8d, .tmp8d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp8b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp7d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp8q, .tmp7b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2q, .tmp8q, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp6, -8), .tmp2q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .sse, .slow_incdec, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._ps, .mova, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp8d, .tmp8d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp8b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp7d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp8q, .tmp7b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2q, .tmp8q, ._, ._ },
+                                .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp6, -8), .tmp2q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                            } },
+                        }, .{
+                            .required_features = .{ .@"64bit", .sse, null, null },
+                            .src_constraints = .{
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                                .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } },
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm0 } },
+                                .{ .type = .f128, .kind = .{ .reg = .xmm1 } },
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
+                                    else => unreachable,
+                                    .e => "__eqtf2",
+                                    .ne => "__netf2",
+                                } } } },
+                                .{ .type = .i32, .kind = .{ .reg = .eax } },
+                                .{ .type = .u8, .kind = .{ .reg = .cl } },
+                                .{ .type = .u64, .kind = .{ .reg = .rdx } },
+                            },
+                            .dst_temps = .{.mem},
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                                .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"0:", ._ps, .mova, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._ps, .mova, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp5d, ._, ._, ._ },
+                                .{ ._, ._, .xor, .tmp8d, .tmp8d, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp6d, .tmp6d, ._, ._ },
+                                .{ ._, .fromCondition(cc), .set, .tmp8b, ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp7d, .tmp1d, ._, ._ },
+                                .{ ._, ._l, .sh, .tmp8q, .tmp7b, ._, ._ },
+                                .{ ._, ._, .@"or", .tmp2q, .tmp8q, ._, ._ },
+                                .{ ._, ._c, .in, .tmp1d, ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .ui(3), ._, ._ },
+                                .{ ._, ._, .mov, .memid(.dst0q, .tmp6, -8), .tmp2q, ._, ._ },
+                                .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
+                                .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ },
+                                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                                .{ ._, ._, .@"test", .tmp1d, .si(0b111111), ._, ._ },
+                                .{ ._, ._z, .j, .@"0f", ._, ._, ._ },
+                                .{ ._, ._, .mov, .tmp6d, .tmp1d, ._, ._ },
+                                .{ ._, ._r, .sh, .tmp6d, .si(9), ._, ._ },
+                                .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp6), .tmp2q, ._, ._ },
+                            } },
+                        } },
+                    }) catch |err| switch (err) {
+                        error.SelectFailed => return cg.fail("failed to select {s} {s} {} {} {}", .{
+                            @tagName(air_tag),
+                            @tagName(extra.compareOperator()),
+                            cg.typeOf(extra.lhs).fmt(pt),
+                            ops[0].tracking(cg),
+                            ops[1].tracking(cg),
+                        }),
+                        else => |e| return e,
+                    },
+                    .gte => unreachable,
+                    .gt => unreachable,
+                }
+                try res[0].finish(inst, &.{ extra.lhs, extra.rhs }, &ops, cg);
+            },
+
+            .abs => |air_tag| if (use_old) try cg.airAbs(inst) else {
+                const ty_op = air_datas[@intFromEnum(inst)].ty_op;
+                var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+                var res: [1]Temp = undefined;
+                cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .int = .byte }, .any },
                     .patterns = &.{
                         .{ .src = .{ .to_gpr, .none } },
                     },
@@ -9037,6 +11796,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9061,7 +11823,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_mm, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .mmx } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .mmx } }},
                     .each = .{ .once = &.{
                         .{ ._, .p_b, .abs, .dst0q, .src0q, ._, ._ },
                     } },
@@ -9072,7 +11834,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_mm, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .mmx } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .mmx } }},
                     .each = .{ .once = &.{
                         .{ ._, .p_w, .abs, .dst0q, .src0q, ._, ._ },
                     } },
@@ -9083,7 +11845,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_mm, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .mmx } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .mmx } }},
                     .each = .{ .once = &.{
                         .{ ._, .p_d, .abs, .dst0q, .src0q, ._, ._ },
                     } },
@@ -9094,7 +11856,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .p_b, .abs, .dst0x, .src0x, ._, ._ },
                     } },
@@ -9105,7 +11867,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .p_w, .abs, .dst0x, .src0x, ._, ._ },
                     } },
@@ -9116,7 +11878,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .p_d, .abs, .dst0x, .src0x, ._, ._ },
                     } },
@@ -9127,7 +11889,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .vp_b, .abs, .dst0x, .src0x, ._, ._ },
                     } },
@@ -9138,7 +11900,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .vp_w, .abs, .dst0x, .src0x, ._, ._ },
                     } },
@@ -9149,7 +11911,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .vp_d, .abs, .dst0x, .src0x, ._, ._ },
                     } },
@@ -9160,7 +11922,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .vp_b, .abs, .dst0y, .src0y, ._, ._ },
                     } },
@@ -9171,7 +11933,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .vp_w, .abs, .dst0y, .src0y, ._, ._ },
                     } },
@@ -9182,7 +11944,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .src = .{ .mem, .none } },
                         .{ .src = .{ .to_sse, .none } },
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, .vp_d, .abs, .dst0y, .src0y, ._, ._ },
                     } },
@@ -9199,6 +11961,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9222,6 +11987,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9245,6 +12013,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9268,6 +12039,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9291,6 +12065,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9314,6 +12091,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9337,6 +12117,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9360,6 +12143,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9383,6 +12169,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9406,6 +12195,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9429,6 +12221,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9452,6 +12247,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9475,6 +12273,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9501,6 +12302,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9527,6 +12331,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9552,6 +12359,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9578,6 +12388,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9602,6 +12415,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9628,6 +12444,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9652,6 +12471,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9678,6 +12500,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9703,6 +12528,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9729,6 +12557,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
                         .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
@@ -9750,15 +12581,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .cmp, .tmp0d, .sa(.none, .add_src0_unaligned_size), ._, ._ },
                         .{ ._, ._b, .j, .@"0b", ._, ._, ._ },
                     } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .dword } }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .v_ps, .@"and", .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
+                    } },
                 }, .{
                     .required_features = .{ .sse, null, null, null },
-                    .src_constraints = .{ .{ .scalar_exact_float = .{ .of = .xword, .is = .dword } }, .any },
+                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .dword } }, .any },
                     .patterns = &.{
                         .{ .src = .{ .to_mut_sse, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .unused,
+                        .unused,
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
@@ -9770,33 +12626,61 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._ps, .@"and", .dst0x, .lea(.xword, .tmp0), ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .sse2, null, null, null },
-                    .src_constraints = .{ .{ .scalar_exact_float = .{ .of = .xword, .is = .qword } }, .any },
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_float = .{ .of = .yword, .is = .dword } }, .any },
                     .patterns = &.{
-                        .{ .src = .{ .to_mut_sse, .none } },
+                        .{ .src = .{ .to_sse, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .yword } } },
+                        .unused,
+                        .unused,
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .ref = .src0 }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, ._pd, .@"and", .dst0x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
                     } },
                 }, .{
-                    .required_features = .{ .sse, null, null, null },
-                    .src_constraints = .{ .{ .scalar_exact_float = .{ .of = .xword, .is = .qword } }, .any },
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .qword } }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .v_pd, .@"and", .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .qword } }, .any },
                     .patterns = &.{
                         .{ .src = .{ .to_mut_sse, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .unused,
+                        .unused,
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
@@ -9805,218 +12689,383 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.{ .ref = .src0 }},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, ._ps, .@"and", .dst0x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._pd, .@"and", .dst0x, .lea(.xword, .tmp0), ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .avx, null, null, null },
-                    .src_constraints = .{ .{ .scalar_exact_float = .{ .of = .xword, .is = .dword } }, .any },
+                    .src_constraints = .{ .{ .scalar_float = .{ .of = .yword, .is = .qword } }, .any },
                     .patterns = &.{
                         .{ .src = .{ .to_sse, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .yword } } },
+                        .unused,
+                        .unused,
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, .v_ps, .@"and", .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
+                        .{ ._, .v_pd, .@"and", .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .x87, null, null, null },
+                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_x87, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .x87 } }},
+                    .each = .{ .once = &.{
+                        .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
+                        .{ ._, .f_, .abs, ._, ._, ._, ._ },
+                        .{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .avx, null, null, null },
-                    .src_constraints = .{ .{ .scalar_exact_float = .{ .of = .xword, .is = .qword } }, .any },
+                    .src_constraints = .{ .{ .scalar_any_float = .xword }, .any },
                     .patterns = &.{
                         .{ .src = .{ .to_sse, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .unused,
+                        .unused,
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, .v_pd, .@"and", .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
+                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
                     } },
                 }, .{
-                    .required_features = .{ .avx, null, null, null },
-                    .src_constraints = .{ .{ .scalar_exact_float = .{ .of = .yword, .is = .dword } }, .any },
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_any_float = .xword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .p_, .@"and", .dst0x, .lea(.xword, .tmp0), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .scalar_any_float = .xword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._ps, .@"and", .dst0x, .lea(.xword, .tmp0), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_any_float = .yword }, .any },
                     .patterns = &.{
                         .{ .src = .{ .to_sse, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .yword } } },
+                        .unused,
+                        .unused,
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
+                        .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
                     } },
                 }, .{
                     .required_features = .{ .avx, null, null, null },
-                    .src_constraints = .{ .{ .scalar_exact_float = .{ .of = .yword, .is = .qword } }, .any },
+                    .src_constraints = .{ .{ .scalar_any_float = .yword }, .any },
                     .patterns = &.{
                         .{ .src = .{ .to_sse, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .yword } } },
+                        .unused,
+                        .unused,
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
                         .{ ._, .v_pd, .@"and", .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
                     } },
                 }, .{
-                    .required_features = .{ .x87, null, null, null },
-                    .src_constraints = .{ .{ .scalar_exact_float = .{ .of = .xword, .is = .tbyte } }, .any },
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, .any },
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
-                        .{ .src = .{ .to_x87, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
-                        .{ .type = .f80, .kind = .{ .reg = .st7 } },
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .yword } } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .rc = .sse } },
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .x87 } }},
+                    .dst_temps = .{.mem},
                     .each = .{ .once = &.{
-                        .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
-                        .{ ._, .f_, .abs, ._, ._, ._, ._ },
-                        .{ ._, .f_p, .st, .dst0t, ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .v_ps, .mova, .tmp2y, .lea(.yword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_ps, .@"and", .tmp3y, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_ps, .mova, .memia(.dst0y, .tmp0, .add_size), .tmp3y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._ps, .mova, .tmp2x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._ps, .mova, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, ._ps, .@"and", .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .avx2, null, null, null },
-                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any },
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, .any },
                     .patterns = &.{
-                        .{ .src = .{ .to_sse, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
-                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .yword } } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.mem},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
+                        .{ ._, .v_pd, .mova, .tmp2y, .lea(.yword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_pd, .@"and", .tmp3y, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_pd, .mova, .memia(.dst0y, .tmp0, .add_size), .tmp3y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .avx, null, null, null },
-                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any },
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, .any },
                     .patterns = &.{
-                        .{ .src = .{ .to_sse, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
-                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.mem},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.xword, .tmp0), ._ },
+                        .{ ._, ._pd, .mova, .tmp2x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._pd, .mova, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, ._pd, .@"and", .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._pd, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .sse2, null, null, null },
-                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any },
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_any_float = .yword }, .any },
                     .patterns = &.{
-                        .{ .src = .{ .to_mut_sse, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
-                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .yword } } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .ref = .src0 }},
+                    .dst_temps = .{.mem},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, .p_, .@"and", .dst0x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2y, .lea(.yword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp3y, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp3y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .sse, null, null, null },
-                    .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any },
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_any_float = .yword }, .any },
                     .patterns = &.{
-                        .{ .src = .{ .to_mut_sse, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
-                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .yword } } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .ref = .src0 }},
+                    .dst_temps = .{.mem},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, ._ps, .@"and", .dst0x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, .v_pd, .mova, .tmp2y, .lea(.yword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_pd, .@"and", .tmp3y, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_pd, .mova, .memia(.dst0y, .tmp0, .add_size), .tmp3y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .avx2, null, null, null },
-                    .src_constraints = .{ .{ .scalar_float = .{ .of = .yword, .is = .xword } }, .any },
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_any_float = .xword }, .any },
                     .patterns = &.{
-                        .{ .src = .{ .to_sse, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
-                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.mem},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .avx, null, null, null },
-                    .src_constraints = .{ .{ .scalar_float = .{ .of = .yword, .is = .xword } }, .any },
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_any_float = .xword }, .any },
                     .patterns = &.{
-                        .{ .src = .{ .to_sse, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
-                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize = true } } },
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .rc = .sse } },
+                        .unused,
                         .unused,
                         .unused,
                         .unused,
                         .unused,
                     },
-                    .dst_temps = .{.{ .mut_reg = .{ .ref = .src0, .rc = .sse } }},
+                    .dst_temps = .{.mem},
                     .each = .{ .once = &.{
                         .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
-                        .{ ._, .v_pd, .@"and", .dst0y, .src0y, .lea(.yword, .tmp0), ._ },
+                        .{ ._, ._ps, .mova, .tmp2x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._ps, .mova, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, ._ps, .@"and", .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 } }) catch |err| switch (err) {
                     error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{
@@ -10194,6 +13243,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .rc = .general_purpose }},
                         .clobbers = .{ .eflags = true },
@@ -10328,6 +13380,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10353,6 +13408,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10389,6 +13447,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10414,6 +13475,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10436,6 +13500,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10459,6 +13526,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10491,6 +13561,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10520,6 +13593,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10552,6 +13628,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10581,6 +13660,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10610,6 +13692,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10639,6 +13724,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10671,6 +13759,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10702,6 +13793,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -10728,6 +13822,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = switch (cc) {
                             else => unreachable,
@@ -10747,7 +13844,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .patterns = &.{
                             .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
                         },
-                        .call_frame = .{ .size = 0, .alignment = .@"16" },
+                        .call_frame = .{ .alignment = .@"16" },
                         .extra_temps = .{
                             .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
                                 else => unreachable,
@@ -10759,11 +13856,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
                         .each = .{ .once = &.{
-                            .{ ._, ._, .call, .tmp0p, ._, ._, ._ },
+                            .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
                             .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
                         } },
                     }, .{
@@ -10781,6 +13881,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = switch (cc) {
                             else => unreachable,
@@ -10806,6 +13909,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = switch (cc) {
                             else => unreachable,
@@ -10822,7 +13928,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .patterns = &.{
                             .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
                         },
-                        .call_frame = .{ .size = 0, .alignment = .@"16" },
+                        .call_frame = .{ .alignment = .@"16" },
                         .extra_temps = .{
                             .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
                                 else => unreachable,
@@ -10834,11 +13940,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
                         .each = .{ .once = &.{
-                            .{ ._, ._, .call, .tmp0p, ._, ._, ._ },
+                            .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
                             .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
                         } },
                     }, .{
@@ -10856,6 +13965,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = switch (cc) {
                             else => unreachable,
@@ -10881,6 +13993,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = switch (cc) {
                             else => unreachable,
@@ -10897,7 +14012,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .patterns = &.{
                             .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
                         },
-                        .call_frame = .{ .size = 0, .alignment = .@"16" },
+                        .call_frame = .{ .alignment = .@"16" },
                         .extra_temps = .{
                             .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
                                 else => unreachable,
@@ -10909,11 +14024,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
                         .each = .{ .once = &.{
-                            .{ ._, ._, .call, .tmp0p, ._, ._, ._ },
+                            .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
                             .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
                         } },
                     }, .{
@@ -10929,6 +14047,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = switch (cc) {
                             else => unreachable,
@@ -10957,6 +14078,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = switch (cc) {
                             else => unreachable,
@@ -10977,7 +14101,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .extra_temps = .{
                             .{ .type = .f80, .kind = .{ .reg = .st6 } },
                             .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u16, .kind = .{ .reg = .ax } },
+                            .{ .type = .u8, .kind = .{ .reg = .ah } },
+                            .unused,
+                            .unused,
+                            .unused,
                             .unused,
                             .unused,
                             .unused,
@@ -11004,10 +14131,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .extra_temps = .{
                             .{ .type = .f80, .kind = .{ .reg = .st6 } },
                             .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u16, .kind = .{ .reg = .ax } },
                             .{ .type = .u8, .kind = .{ .reg = .ah } },
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -11016,8 +14146,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
                             .{ ._, .f_pp, .ucom, ._, ._, ._, ._ },
                             .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
-                            .{ ._, ._, .xor, .tmp3b, .si(0b0_1_000_000), ._, ._ },
-                            .{ ._, ._, .@"test", .tmp3b, .si(0b0_1_000_100), ._, ._ },
+                            .{ ._, ._, .xor, .tmp2b, .si(0b0_1_000_000), ._, ._ },
+                            .{ ._, ._, .@"test", .tmp2b, .si(0b0_1_000_100), ._, ._ },
                         } },
                     }, .{
                         .required_features = .{ .x87, null, null, null },
@@ -11028,7 +14158,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .extra_temps = .{
                             .{ .type = .f80, .kind = .{ .reg = .st6 } },
                             .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u16, .kind = .{ .reg = .ax } },
+                            .{ .type = .u8, .kind = .{ .reg = .ah } },
+                            .unused,
+                            .unused,
+                            .unused,
                             .unused,
                             .unused,
                             .unused,
@@ -11057,7 +14190,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .extra_temps = .{
                             .{ .type = .f80, .kind = .{ .reg = .st6 } },
                             .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u16, .kind = .{ .reg = .ax } },
+                            .{ .type = .u16, .kind = .{ .reg = .ah } },
+                            .unused,
+                            .unused,
+                            .unused,
                             .unused,
                             .unused,
                             .unused,
@@ -11085,10 +14221,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .extra_temps = .{
                             .{ .type = .f80, .kind = .{ .reg = .st6 } },
                             .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u16, .kind = .{ .reg = .ax } },
                             .{ .type = .u8, .kind = .{ .reg = .ah } },
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true },
@@ -11096,8 +14235,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .{ ._, .f_, .ld, .src0t, ._, ._, ._ },
                             .{ ._, .f_p, .ucom, .src1t, ._, ._, ._ },
                             .{ ._, .fn_sw, .st, .tmp2w, ._, ._, ._ },
-                            .{ ._, ._, .xor, .tmp3b, .si(0b0_1_000_000), ._, ._ },
-                            .{ ._, ._, .@"test", .tmp3b, .si(0b0_1_000_100), ._, ._ },
+                            .{ ._, ._, .xor, .tmp2b, .si(0b0_1_000_000), ._, ._ },
+                            .{ ._, ._, .@"test", .tmp2b, .si(0b0_1_000_100), ._, ._ },
                         } },
                     }, .{
                         .required_features = .{ .x87, null, null, null },
@@ -11110,7 +14249,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .extra_temps = .{
                             .{ .type = .f80, .kind = .{ .reg = .st6 } },
                             .{ .type = .f80, .kind = .{ .reg = .st7 } },
-                            .{ .type = .u16, .kind = .{ .reg = .ax } },
+                            .{ .type = .u16, .kind = .{ .reg = .ah } },
+                            .unused,
+                            .unused,
+                            .unused,
                             .unused,
                             .unused,
                             .unused,
@@ -11133,7 +14275,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .patterns = &.{
                             .{ .src = .{ .{ .to_reg = .xmm0 }, .{ .to_reg = .xmm1 } } },
                         },
-                        .call_frame = .{ .size = 0, .alignment = .@"16" },
+                        .call_frame = .{ .alignment = .@"16" },
                         .extra_temps = .{
                             .{ .type = .usize, .kind = .{ .symbol = &.{ .name = switch (cc) {
                                 else => unreachable,
@@ -11145,11 +14287,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                             .unused,
                             .unused,
                             .unused,
+                            .unused,
+                            .unused,
+                            .unused,
                         },
                         .dst_temps = .{.{ .cc = cc }},
                         .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
                         .each = .{ .once = &.{
-                            .{ ._, ._, .call, .tmp0p, ._, ._, ._ },
+                            .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
                             .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ },
                         } },
                     } },
@@ -11753,7 +14898,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
 
             .err_return_trace => {
                 const ert: Temp = .{ .index = err_ret_trace_index };
-                try ert.moveTo(inst, cg);
+                try ert.finish(inst, &.{}, &.{}, cg);
             },
             .set_err_return_trace => {
                 const un_op = air_datas[@intFromEnum(inst)].un_op;
@@ -11788,7 +14933,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 var ert: Temp = .{ .index = err_ret_trace_index };
                 var res = try ert.load(.usize, .{ .disp = @intCast(agg_ty.structFieldOffset(ty_pl.payload, zcu)) }, cg);
                 try ert.die(cg);
-                try res.moveTo(inst, cg);
+                try res.finish(inst, &.{}, &.{}, cg);
             },
 
             .vector_store_elem => return cg.fail("TODO implement vector_store_elem", .{}),
@@ -23113,7 +26258,11 @@ fn lowerBlock(self: *CodeGen, inst: Air.Inst.Index, body: []const Air.Inst.Index
             .resurrect = true,
             .close_scope = true,
         });
-        for (block_data.value.relocs.items) |reloc| self.performReloc(reloc);
+        const block_relocs_last_index = block_data.value.relocs.items.len - 1;
+        for (if (block_data.value.relocs.items[block_relocs_last_index] == self.mir_instructions.len - 1) block_relocs: {
+            _ = self.mir_instructions.pop();
+            break :block_relocs block_data.value.relocs.items[0..block_relocs_last_index];
+        } else block_data.value.relocs.items) |block_reloc| self.performReloc(block_reloc);
     }
 
     if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i);
@@ -24623,13 +27772,15 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool)
                     64 => switch (ty.vectorLen(zcu)) {
                         1...2 => return .{ .move = if (self.hasFeature(.avx))
                             .{ .v_pd, if (aligned) .mova else .movu }
+                        else if (self.hasFeature(.sse2))
+                            .{ ._pd, if (aligned) .mova else .movu }
                         else
-                            .{ ._pd, if (aligned) .mova else .movu } },
+                            .{ ._ps, if (aligned) .mova else .movu } },
                         3...4 => if (self.hasFeature(.avx))
                             return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } },
                         else => {},
                     },
-                    128 => switch (ty.vectorLen(zcu)) {
+                    80, 128 => switch (ty.vectorLen(zcu)) {
                         1 => return .{ .move = if (self.hasFeature(.avx))
                             .{ if (aligned) .v_dqa else .v_dqu, .mov }
                         else if (self.hasFeature(.sse2))
@@ -24958,7 +28109,7 @@ fn genSetReg(
             },
             .mmx => unreachable,
             .sse => switch (src_reg.class()) {
-                .general_purpose => try self.asmRegisterRegister(
+                .general_purpose => if (self.hasFeature(.sse2)) try self.asmRegisterRegister(
                     switch (abi_size) {
                         1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
                         5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
@@ -24966,7 +28117,26 @@ fn genSetReg(
                     },
                     dst_reg.to128(),
                     registerAlias(src_reg, @max(abi_size, 4)),
-                ),
+                ) else {
+                    const frame_size = std.math.ceilPowerOfTwoAssert(u32, @max(abi_size, 4));
+                    const frame_index = try self.allocFrameIndex(.init(.{
+                        .size = frame_size,
+                        .alignment = .fromNonzeroByteUnits(frame_size),
+                    }));
+                    try self.asmMemoryRegister(.{ ._, .mov }, .{
+                        .base = .{ .frame = frame_index },
+                        .mod = .{ .rm = .{ .size = .fromSize(abi_size) } },
+                    }, registerAlias(src_reg, abi_size));
+                    try self.asmRegisterMemory(switch (frame_size) {
+                        4 => .{ ._ss, .mov },
+                        8 => .{ ._ps, .movl },
+                        16 => .{ ._ps, .mov },
+                        else => unreachable,
+                    }, dst_reg.to128(), .{
+                        .base = .{ .frame = frame_index },
+                        .mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
+                    });
+                },
                 .segment => try self.genSetReg(
                     dst_reg,
                     ty,
@@ -24977,7 +28147,12 @@ fn genSetReg(
                 .sse => try self.asmRegisterRegister(
                     @as(?Mir.Inst.FixedTag, switch (ty.scalarType(zcu).zigTypeTag(zcu)) {
                         else => switch (abi_size) {
-                            1...16 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else .{ ._dqa, .mov },
+                            1...16 => if (self.hasFeature(.avx))
+                                .{ .v_dqa, .mov }
+                            else if (self.hasFeature(.sse2))
+                                .{ ._dqa, .mov }
+                            else
+                                .{ ._ps, .mova },
                             17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null,
                             else => null,
                         },
@@ -24985,13 +28160,20 @@ fn genSetReg(
                             16, 128 => switch (abi_size) {
                                 2...16 => if (self.hasFeature(.avx))
                                     .{ .v_dqa, .mov }
+                                else if (self.hasFeature(.sse2))
+                                    .{ ._dqa, .mov }
                                 else
-                                    .{ ._dqa, .mov },
+                                    .{ ._ps, .mova },
                                 17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null,
                                 else => null,
                             },
                             32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
-                            64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
+                            64 => if (self.hasFeature(.avx))
+                                .{ .v_pd, .mova }
+                            else if (self.hasFeature(.sse2))
+                                .{ ._pd, .mova }
+                            else
+                                .{ ._ps, .mova },
                             80 => null,
                             else => unreachable,
                         },
@@ -25688,15 +28870,19 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void {
         const src_lock = if (src_mcv.getReg()) |src_reg| self.register_manager.lockReg(src_reg) else null;
         defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
 
-        const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.class() == .general_purpose else true) and
+        const dst_mcv = if (src_mcv != .register_mask and
+            (if (src_mcv.getReg()) |src_reg| src_reg.class() == .general_purpose else true) and
             dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and
             dst_ty.abiAlignment(zcu).order(src_ty.abiAlignment(zcu)).compare(.lte) and
             self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: {
             const dst_mcv = try self.allocRegOrMem(inst, true);
-            try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) {
-                .lt => dst_ty,
-                .eq => if (!dst_mcv.isBase() or src_mcv.isBase()) dst_ty else src_ty,
-                .gt => src_ty,
+            try self.genCopy(switch (src_mcv) {
+                else => switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) {
+                    .lt => dst_ty,
+                    .eq => if (!dst_mcv.isBase() or src_mcv.isBase()) dst_ty else src_ty,
+                    .gt => src_ty,
+                },
+                .register_mask => src_ty,
             }, dst_mcv, src_mcv, .{});
             break :dst dst_mcv;
         };
@@ -31173,7 +34359,7 @@ const Select = struct {
         dst_constraints: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]Constraint = @splat(.any),
         src_constraints: [@intFromEnum(Select.Operand.Ref.none) - @intFromEnum(Select.Operand.Ref.src0)]Constraint = @splat(.any),
         patterns: []const Select.Pattern,
-        call_frame: packed struct(u16) { size: u10, alignment: InternPool.Alignment } = .{ .size = 0, .alignment = .none },
+        call_frame: packed struct(u16) { size: u10 = 0, alignment: InternPool.Alignment } = .{ .size = 0, .alignment = .none },
         extra_temps: [@intFromEnum(Select.Operand.Ref.dst0) - @intFromEnum(Select.Operand.Ref.tmp0)]TempSpec = @splat(.unused),
         dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused),
         clobbers: packed struct {
@@ -31202,14 +34388,16 @@ const Select = struct {
         multiple_size: Memory.Size,
         int: Memory.Size,
         scalar_int_is: Memory.Size,
-        scalar_int: struct { of: Memory.Size, is: Memory.Size },
+        scalar_int: OfIsSizes,
         scalar_signed_int: Memory.Size,
         scalar_unsigned_int: Memory.Size,
-        scalar_remainder_int: struct { of: Memory.Size, is: Memory.Size },
+        multiple_scalar_int: OfIsSizes,
+        scalar_remainder_int: OfIsSizes,
         float: Memory.Size,
-        scalar_float: struct { of: Memory.Size, is: Memory.Size },
-        scalar_exact_float: struct { of: Memory.Size, is: Memory.Size },
-        multiple_scalar_int: struct { of: Memory.Size, is: Memory.Size },
+        scalar_any_float: Memory.Size,
+        scalar_float: OfIsSizes,
+        multiple_scalar_any_float: Memory.Size,
+        multiple_scalar_float: OfIsSizes,
         exact_int: u16,
         exact_signed_int: u16,
         exact_unsigned_int: u16,
@@ -31218,16 +34406,18 @@ const Select = struct {
         po2_int: Memory.Size,
         signed_po2_int: Memory.Size,
         unsigned_po2_or_exact_int: Memory.Size,
-        remainder_int: struct { of: Memory.Size, is: Memory.Size },
-        exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
-        signed_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
-        unsigned_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size },
+        remainder_int: OfIsSizes,
+        exact_remainder_int: OfIsSizes,
+        signed_or_exact_remainder_int: OfIsSizes,
+        unsigned_or_exact_remainder_int: OfIsSizes,
         signed_int: Memory.Size,
         unsigned_int: Memory.Size,
         elem_size_is: u8,
         po2_elem_size,
         elem_int: Memory.Size,
 
+        const OfIsSizes = struct { of: Memory.Size, is: Memory.Size };
+
         fn accepts(constraint: Constraint, ty: Type, cg: *CodeGen) bool {
             const zcu = cg.pt.zcu;
             return switch (constraint) {
@@ -31258,7 +34448,7 @@ const Select = struct {
                     size.bitSize(cg.target) >= int_info.bits
                 else
                     false,
-                .scalar_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= ty.abiSize(zcu) and
+                .scalar_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
                     if (cg.intInfo(ty.scalarType(zcu))) |int_info| of_is.is.bitSize(cg.target) >= int_info.bits else false,
                 .scalar_signed_int => |size| if (cg.intInfo(ty.scalarType(zcu))) |int_info| switch (int_info.signedness) {
                     .signed => size.bitSize(cg.target) >= int_info.bits,
@@ -31275,9 +34465,13 @@ const Select = struct {
                 else
                     false,
                 .float => |size| if (cg.floatBits(ty)) |float_bits| size.bitSize(cg.target) == float_bits else false,
-                .scalar_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= ty.abiSize(zcu) and
-                    if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) >= float_bits else false,
-                .scalar_exact_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= ty.abiSize(zcu) and
+                .scalar_any_float => |size| @divExact(size.bitSize(cg.target), 8) >= ty.abiSize(zcu) and
+                    cg.floatBits(ty.scalarType(zcu)) != null,
+                .scalar_float => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
+                    if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false,
+                .multiple_scalar_any_float => |size| ty.abiSize(zcu) % @divExact(size.bitSize(cg.target), 8) == 0 and
+                    cg.floatBits(ty.scalarType(zcu)) != null,
+                .multiple_scalar_float => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
                     if (cg.floatBits(ty.scalarType(zcu))) |float_bits| of_is.is.bitSize(cg.target) == float_bits else false,
                 .exact_int => |bit_size| if (cg.intInfo(ty)) |int_info| bit_size == int_info.bits else false,
                 .exact_signed_int => |bit_size| if (cg.intInfo(ty)) |int_info| switch (int_info.signedness) {
@@ -31505,48 +34699,81 @@ const Select = struct {
             unused,
             any,
             cc: Condition,
+            ref: Select.Operand.Ref,
             reg: Register,
             rc: Register.Class,
+            mut_rc: struct { ref: Select.Operand.Ref, rc: Register.Class },
+            ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo },
             rc_mask: struct { rc: Register.Class, info: MaskInfo },
+            mut_rc_mask: struct { ref: Select.Operand.Ref, rc: Register.Class, info: MaskInfo },
             mem,
             smin_mem: ConstInfo,
             smax_mem: ConstInfo,
             umin_mem: ConstInfo,
             umax_mem: ConstInfo,
-            ref: Select.Operand.Ref,
-            ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo },
-            mut_reg: struct { ref: Select.Operand.Ref, rc: Register.Class },
             symbol: *const struct { lib: ?[]const u8 = null, name: []const u8 },
 
-            const ConstInfo = struct { ref: Select.Operand.Ref, vectorize: bool = false };
+            const ConstInfo = struct { ref: Select.Operand.Ref, vectorize_to: Memory.Size = .none };
 
             fn finish(kind: Kind, temp: Temp, s: *const Select) void {
                 switch (kind) {
                     else => {},
-                    inline .rc_mask, .ref_mask => |mask| temp.asMask(mask.info, s.cg),
+                    inline .rc_mask, .mut_rc_mask, .ref_mask => |mask| temp.asMask(mask.info, s.cg),
                 }
             }
         };
 
-        fn create(spec: TempSpec, s: *Select) !?Temp {
+        fn pass(spec: TempSpec) u2 {
+            return switch (spec.kind) {
+                .unused => 0,
+                .reg => 1,
+                else => 2,
+            };
+        }
+
+        fn create(spec: TempSpec, s: *Select) !Temp {
             const cg = s.cg;
             return switch (spec.kind) {
-                .unused => null,
+                .unused => unreachable,
                 .any => try cg.tempAlloc(spec.type),
                 .cc => |cc| try cg.tempInit(spec.type, .{ .eflags = cc }),
+                .ref => |ref| ref.deref(s),
                 .reg => |reg| try cg.tempInit(spec.type, .{ .register = reg }),
                 .rc => |rc| try cg.tempAllocReg(spec.type, regSetForRegClass(rc)),
+                .mut_rc => |ref_rc| {
+                    const temp = ref_rc.ref.deref(s);
+                    if (temp.isMut(cg)) switch (temp.tracking(cg).short) {
+                        .register => |reg| if (reg.class() == ref_rc.rc) return temp,
+                        .register_offset => |reg_off| if (reg_off.off == 0 and reg_off.reg.class() == ref_rc.rc) return temp,
+                        else => {},
+                    };
+                    return try cg.tempAllocReg(spec.type, regSetForRegClass(ref_rc.rc));
+                },
+                .ref_mask => |ref_mask| ref_mask.ref.deref(s),
                 .rc_mask => |rc_mask| try cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)),
+                .mut_rc_mask => |ref_rc_mask| {
+                    const temp = ref_rc_mask.ref.deref(s);
+                    if (temp.isMut(cg)) switch (temp.tracking(cg).short) {
+                        .register => |reg| if (reg.class() == ref_rc_mask.rc) return temp,
+                        .register_offset => |reg_off| if (reg_off.off == 0 and reg_off.reg.class() == ref_rc_mask.rc) return temp,
+                        else => {},
+                    };
+                    return try cg.tempAllocReg(spec.type, regSetForRegClass(ref_rc_mask.rc));
+                },
                 .mem => try cg.tempAllocMem(spec.type),
                 .smin_mem, .smax_mem, .umin_mem, .umax_mem => |const_info| {
                     const pt = cg.pt;
                     const zcu = pt.zcu;
                     const ip = &zcu.intern_pool;
-                    const ty = const_info.ref.deref(s).typeOf(s.cg);
-                    const vector_len: ?u32, const scalar_ty: Type = switch (ip.indexToKey(ty.toIntern())) {
-                        else => .{ if (const_info.vectorize) 1 else null, ty },
+                    const ty = const_info.ref.deref(s).typeOf(cg);
+                    const vector_len, const scalar_ty: Type = switch (ip.indexToKey(ty.toIntern())) {
+                        else => .{ null, ty },
                         .vector_type => |vector_type| .{ vector_type.len, .fromInterned(vector_type.child) },
                     };
+                    const res_vector_len: ?u32 = if (const_info.vectorize_to != .none)
+                        @intCast(@divExact(@divExact(const_info.vectorize_to.bitSize(cg.target), 8), scalar_ty.abiSize(pt.zcu)))
+                    else
+                        vector_len;
                     const res_scalar_ty, const res_scalar_val: Value = res_scalar: switch (scalar_ty.toIntern()) {
                         .bool_type => .{
                             scalar_ty,
@@ -31591,7 +34818,7 @@ const Select = struct {
                             break :res_scalar .{ scalar_int_ty, try pt.intValue_big(scalar_int_ty, big_int.toConst()) };
                         },
                     };
-                    const res_val: Value = if (vector_len) |len| .fromInterned(try pt.intern(.{ .aggregate = .{
+                    const res_val: Value = if (res_vector_len) |len| .fromInterned(try pt.intern(.{ .aggregate = .{
                         .ty = (try pt.vectorType(.{
                             .len = len,
                             .child = res_scalar_ty.toIntern(),
@@ -31600,17 +34827,6 @@ const Select = struct {
                     } })) else res_scalar_val;
                     return try cg.tempFromValue(res_val);
                 },
-                .ref => |ref| ref.deref(s),
-                .ref_mask => |ref_mask| ref_mask.ref.deref(s),
-                .mut_reg => |ref_rc| {
-                    const temp = ref_rc.ref.deref(s);
-                    if (temp.isMut(cg)) switch (temp.tracking(cg).short) {
-                        .register => |reg| if (reg.class() == ref_rc.rc) return temp,
-                        .register_offset => |reg_off| if (reg_off.off == 0 and reg_off.reg.class() == ref_rc.rc) return temp,
-                        else => {},
-                    };
-                    return try cg.tempAllocReg(spec.type, regSetForRegClass(ref_rc.rc));
-                },
                 .symbol => |symbol| if (cg.bin_file.cast(.elf)) |elf_file| try cg.tempInit(spec.type, .{ .lea_symbol = .{
                     .sym_index = try elf_file.getGlobalSymbol(symbol.name, symbol.lib),
                 } }) else if (cg.bin_file.cast(.macho)) |macho_file| try cg.tempInit(spec.type, .{ .lea_symbol = .{
@@ -31707,6 +34923,9 @@ const Select = struct {
             tmp3,
             tmp4,
             tmp5,
+            tmp6,
+            tmp7,
+            tmp8,
             dst0,
             src0,
             src1,
@@ -31778,6 +34997,36 @@ const Select = struct {
                 const tmp5x: Sized = .{ .ref = .tmp5, .size = .xword };
                 const tmp5y: Sized = .{ .ref = .tmp5, .size = .yword };
 
+                const tmp6: Sized = .{ .ref = .tmp6, .size = .none };
+                const tmp6b: Sized = .{ .ref = .tmp6, .size = .byte };
+                const tmp6w: Sized = .{ .ref = .tmp6, .size = .word };
+                const tmp6d: Sized = .{ .ref = .tmp6, .size = .dword };
+                const tmp6p: Sized = .{ .ref = .tmp6, .size = .ptr };
+                const tmp6q: Sized = .{ .ref = .tmp6, .size = .qword };
+                const tmp6t: Sized = .{ .ref = .tmp6, .size = .tbyte };
+                const tmp6x: Sized = .{ .ref = .tmp6, .size = .xword };
+                const tmp6y: Sized = .{ .ref = .tmp6, .size = .yword };
+
+                const tmp7: Sized = .{ .ref = .tmp7, .size = .none };
+                const tmp7b: Sized = .{ .ref = .tmp7, .size = .byte };
+                const tmp7w: Sized = .{ .ref = .tmp7, .size = .word };
+                const tmp7d: Sized = .{ .ref = .tmp7, .size = .dword };
+                const tmp7p: Sized = .{ .ref = .tmp7, .size = .ptr };
+                const tmp7q: Sized = .{ .ref = .tmp7, .size = .qword };
+                const tmp7t: Sized = .{ .ref = .tmp7, .size = .tbyte };
+                const tmp7x: Sized = .{ .ref = .tmp7, .size = .xword };
+                const tmp7y: Sized = .{ .ref = .tmp7, .size = .yword };
+
+                const tmp8: Sized = .{ .ref = .tmp8, .size = .none };
+                const tmp8b: Sized = .{ .ref = .tmp8, .size = .byte };
+                const tmp8w: Sized = .{ .ref = .tmp8, .size = .word };
+                const tmp8d: Sized = .{ .ref = .tmp8, .size = .dword };
+                const tmp8p: Sized = .{ .ref = .tmp8, .size = .ptr };
+                const tmp8q: Sized = .{ .ref = .tmp8, .size = .qword };
+                const tmp8t: Sized = .{ .ref = .tmp8, .size = .tbyte };
+                const tmp8x: Sized = .{ .ref = .tmp8, .size = .xword };
+                const tmp8y: Sized = .{ .ref = .tmp8, .size = .yword };
+
                 const dst0: Sized = .{ .ref = .dst0, .size = .none };
                 const dst0b: Sized = .{ .ref = .dst0, .size = .byte };
                 const dst0w: Sized = .{ .ref = .dst0, .size = .word };
@@ -31875,6 +35124,33 @@ const Select = struct {
         const tmp5x: Select.Operand = .{ .tag = .ref, .base = .tmp5x };
         const tmp5y: Select.Operand = .{ .tag = .ref, .base = .tmp5y };
 
+        const tmp6b: Select.Operand = .{ .tag = .ref, .base = .tmp6b };
+        const tmp6w: Select.Operand = .{ .tag = .ref, .base = .tmp6w };
+        const tmp6d: Select.Operand = .{ .tag = .ref, .base = .tmp6d };
+        const tmp6p: Select.Operand = .{ .tag = .ref, .base = .tmp6p };
+        const tmp6q: Select.Operand = .{ .tag = .ref, .base = .tmp6q };
+        const tmp6t: Select.Operand = .{ .tag = .ref, .base = .tmp6t };
+        const tmp6x: Select.Operand = .{ .tag = .ref, .base = .tmp6x };
+        const tmp6y: Select.Operand = .{ .tag = .ref, .base = .tmp6y };
+
+        const tmp7b: Select.Operand = .{ .tag = .ref, .base = .tmp7b };
+        const tmp7w: Select.Operand = .{ .tag = .ref, .base = .tmp7w };
+        const tmp7d: Select.Operand = .{ .tag = .ref, .base = .tmp7d };
+        const tmp7p: Select.Operand = .{ .tag = .ref, .base = .tmp7p };
+        const tmp7q: Select.Operand = .{ .tag = .ref, .base = .tmp7q };
+        const tmp7t: Select.Operand = .{ .tag = .ref, .base = .tmp7t };
+        const tmp7x: Select.Operand = .{ .tag = .ref, .base = .tmp7x };
+        const tmp7y: Select.Operand = .{ .tag = .ref, .base = .tmp7y };
+
+        const tmp8b: Select.Operand = .{ .tag = .ref, .base = .tmp8b };
+        const tmp8w: Select.Operand = .{ .tag = .ref, .base = .tmp8w };
+        const tmp8d: Select.Operand = .{ .tag = .ref, .base = .tmp8d };
+        const tmp8p: Select.Operand = .{ .tag = .ref, .base = .tmp8p };
+        const tmp8q: Select.Operand = .{ .tag = .ref, .base = .tmp8q };
+        const tmp8t: Select.Operand = .{ .tag = .ref, .base = .tmp8t };
+        const tmp8x: Select.Operand = .{ .tag = .ref, .base = .tmp8x };
+        const tmp8y: Select.Operand = .{ .tag = .ref, .base = .tmp8y };
+
         const dst0b: Select.Operand = .{ .tag = .ref, .base = .dst0b };
         const dst0w: Select.Operand = .{ .tag = .ref, .base = .dst0w };
         const dst0d: Select.Operand = .{ .tag = .ref, .base = .dst0d };
@@ -32204,14 +35480,20 @@ fn select(
             const src_slots = s.temps[@intFromEnum(Select.Operand.Ref.src0)..@intFromEnum(Select.Operand.Ref.none)];
 
             caller_preserved: {
-                switch (switch (case.clobbers.caller_preserved) {
+                const cc = switch (case.clobbers.caller_preserved) {
                     .none => break :caller_preserved,
                     .ccc => cg.target.cCallingConvention().?,
                     .zigcc => .auto,
-                }) {
+                };
+                const err_ret_trace_reg = if (cc == .auto and cg.pt.zcu.comp.config.any_error_tracing) err_ret_trace_reg: {
+                    const param_gpr = abi.getCAbiIntParamRegs(.auto);
+                    break :err_ret_trace_reg param_gpr[param_gpr.len - 1];
+                } else .none;
+                switch (cc) {
                     else => unreachable,
-                    inline .x86_64_sysv, .x86_64_win, .auto => |_, tag| inline for (comptime abi.getCallerPreservedRegs(tag)) |reg| {
-                        const tracked_index = RegisterManager.indexOfKnownRegIntoTracked(reg) orelse continue;
+                    inline .x86_64_sysv, .x86_64_win, .auto => |_, tag| inline for (comptime abi.getCallerPreservedRegs(tag)) |reg| skip: {
+                        if (reg == err_ret_trace_reg) break :skip;
+                        const tracked_index = RegisterManager.indexOfKnownRegIntoTracked(reg) orelse break :skip;
                         try cg.register_manager.getRegIndex(tracked_index, null);
                         assert(cg.register_manager.lockRegIndexAssumeUnused(tracked_index).tracked_index == tracked_index);
                     },
@@ -32220,7 +35502,9 @@ fn select(
 
             @memcpy(src_slots[0..src_temps.len], src_temps);
             std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]);
-            for (tmp_slots, case.extra_temps) |*slot, spec| slot.* = try spec.create(&s) orelse continue;
+            for (1..3) |pass| for (tmp_slots, case.extra_temps) |*slot, spec| if (pass == spec.pass()) {
+                slot.* = try spec.create(&s);
+            };
 
             while (true) for (pattern.src[0..src_temps.len], src_temps) |src_pattern, *src_temp| {
                 if (try src_pattern.convert(src_temp, cg)) break;
@@ -32231,7 +35515,7 @@ fn select(
             if (case.clobbers.eflags) try cg.spillEflagsIfOccupied();
 
             for (dst_temps, dst_tys, case.dst_temps[0..dst_temps.len]) |*dst_temp, dst_ty, dst_kind|
-                dst_temp.* = (try Select.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, &s)).?;
+                dst_temp.* = (try Select.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, &s));
             @memcpy(dst_slots[0..dst_temps.len], dst_temps);
 
             switch (case.each) {
@@ -32243,16 +35527,20 @@ fn select(
             assert(s.top == 0);
 
             caller_preserved: {
-                switch (switch (case.clobbers.caller_preserved) {
+                const cc = switch (case.clobbers.caller_preserved) {
                     .none => break :caller_preserved,
                     .ccc => cg.target.cCallingConvention().?,
                     .zigcc => .auto,
-                }) {
+                };
+                const err_ret_trace_reg = if (cc == .auto and cg.pt.zcu.comp.config.any_error_tracing) err_ret_trace_reg: {
+                    const param_gpr = abi.getCAbiIntParamRegs(.auto);
+                    break :err_ret_trace_reg param_gpr[param_gpr.len - 1];
+                } else .none;
+                switch (cc) {
                     else => unreachable,
-                    inline .x86_64_sysv, .x86_64_win, .auto => |_, tag| inline for (comptime abi.getCallerPreservedRegs(tag)) |reg| {
-                        cg.register_manager.unlockReg(.{
-                            .tracked_index = RegisterManager.indexOfKnownRegIntoTracked(reg) orelse continue,
-                        });
+                    inline .x86_64_sysv, .x86_64_win, .auto => |_, tag| inline for (comptime abi.getCallerPreservedRegs(tag)) |reg| skip: {
+                        if (reg == err_ret_trace_reg) break :skip;
+                        cg.register_manager.unlockReg(.{ .tracked_index = RegisterManager.indexOfKnownRegIntoTracked(reg) orelse break :skip });
                     },
                 }
             }
src/codegen/c/Type.zig
@@ -1289,26 +1289,26 @@ pub const Pool = struct {
         kind: Kind,
     ) !CType {
         switch (int_info.bits) {
-            0 => return CType.void,
+            0 => return .void,
             1...8 => switch (int_info.signedness) {
-                .signed => return CType.i8,
-                .unsigned => return CType.u8,
+                .signed => return .i8,
+                .unsigned => return .u8,
             },
             9...16 => switch (int_info.signedness) {
-                .signed => return CType.i16,
-                .unsigned => return CType.u16,
+                .signed => return .i16,
+                .unsigned => return .u16,
             },
             17...32 => switch (int_info.signedness) {
-                .signed => return CType.i32,
-                .unsigned => return CType.u32,
+                .signed => return .i32,
+                .unsigned => return .u32,
             },
             33...64 => switch (int_info.signedness) {
-                .signed => return CType.i64,
-                .unsigned => return CType.u64,
+                .signed => return .i64,
+                .unsigned => return .u64,
             },
             65...128 => switch (int_info.signedness) {
-                .signed => return CType.i128,
-                .unsigned => return CType.u128,
+                .signed => return .i128,
+                .unsigned => return .u128,
             },
             else => {
                 const target = &mod.resolved_target.result;
@@ -1357,19 +1357,19 @@ pub const Pool = struct {
             .null_type,
             .undefined_type,
             .enum_literal_type,
-            => return CType.void,
-            .u1_type, .u8_type => return CType.u8,
-            .i8_type => return CType.i8,
-            .u16_type => return CType.u16,
-            .i16_type => return CType.i16,
-            .u29_type, .u32_type => return CType.u32,
-            .i32_type => return CType.i32,
-            .u64_type => return CType.u64,
-            .i64_type => return CType.i64,
-            .u80_type, .u128_type => return CType.u128,
-            .i128_type => return CType.i128,
-            .usize_type => return CType.usize,
-            .isize_type => return CType.isize,
+            => return .void,
+            .u1_type, .u8_type => return .u8,
+            .i8_type => return .i8,
+            .u16_type => return .u16,
+            .i16_type => return .i16,
+            .u29_type, .u32_type => return .u32,
+            .i32_type => return .i32,
+            .u64_type => return .u64,
+            .i64_type => return .i64,
+            .u80_type, .u128_type => return .u128,
+            .i128_type => return .i128,
+            .usize_type => return .usize,
+            .isize_type => return .isize,
             .c_char_type => return .{ .index = .char },
             .c_short_type => return .{ .index = .short },
             .c_ushort_type => return .{ .index = .@"unsigned short" },
@@ -1380,12 +1380,12 @@ pub const Pool = struct {
             .c_longlong_type => return .{ .index = .@"long long" },
             .c_ulonglong_type => return .{ .index = .@"unsigned long long" },
             .c_longdouble_type => return .{ .index = .@"long double" },
-            .f16_type => return CType.f16,
-            .f32_type => return CType.f32,
-            .f64_type => return CType.f64,
-            .f80_type => return CType.f80,
-            .f128_type => return CType.f128,
-            .bool_type, .optional_noreturn_type => return CType.bool,
+            .f16_type => return .f16,
+            .f32_type => return .f32,
+            .f64_type => return .f64,
+            .f80_type => return .f80,
+            .f128_type => return .f128,
+            .bool_type, .optional_noreturn_type => return .bool,
             .noreturn_type,
             .anyframe_type,
             .generic_poison_type,
@@ -1397,19 +1397,20 @@ pub const Pool = struct {
                 .signedness = .unsigned,
                 .bits = pt.zcu.errorSetBits(),
             }, mod, kind),
+
             .manyptr_u8_type,
             => return pool.getPointer(allocator, .{
-                .elem_ctype = CType.u8,
+                .elem_ctype = .u8,
             }),
             .manyptr_const_u8_type,
             .manyptr_const_u8_sentinel_0_type,
             => return pool.getPointer(allocator, .{
-                .elem_ctype = CType.u8,
+                .elem_ctype = .u8,
                 .@"const" = true,
             }),
             .single_const_pointer_to_comptime_int_type,
             => return pool.getPointer(allocator, .{
-                .elem_ctype = CType.void,
+                .elem_ctype = .void,
                 .@"const" = true,
             }),
             .slice_const_u8_type,
@@ -1420,14 +1421,14 @@ pub const Pool = struct {
                     .{
                         .name = .{ .index = .ptr },
                         .ctype = try pool.getPointer(allocator, .{
-                            .elem_ctype = CType.u8,
+                            .elem_ctype = .u8,
                             .@"const" = true,
                         }),
                         .alignas = AlignAs.fromAbiAlignment(Type.ptrAbiAlignment(target.*)),
                     },
                     .{
                         .name = .{ .index = .len },
-                        .ctype = CType.usize,
+                        .ctype = .usize,
                         .alignas = AlignAs.fromAbiAlignment(
                             Type.intAbiAlignment(target.ptrBitWidth(), target.*),
                         ),
@@ -1436,6 +1437,97 @@ pub const Pool = struct {
                 return pool.fromFields(allocator, .@"struct", &fields, kind);
             },
 
+            .vector_4_f16_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .f16,
+                    .len = 4,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.f16.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_8_f16_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .f16,
+                    .len = 8,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.f16.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_4_f32_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .f32,
+                    .len = 4,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.f32.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_8_f32_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .f32,
+                    .len = 8,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.f32.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_2_f64_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .f64,
+                    .len = 2,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.f64.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_4_f64_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .f64,
+                    .len = 4,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.f64.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+
             .undef,
             .zero,
             .zero_usize,
@@ -1521,7 +1613,7 @@ pub const Pool = struct {
                             },
                             .{
                                 .name = .{ .index = .len },
-                                .ctype = CType.usize,
+                                .ctype = .usize,
                                 .alignas = AlignAs.fromAbiAlignment(
                                     Type.intAbiAlignment(target.ptrBitWidth(), target.*),
                                 ),
@@ -1532,7 +1624,7 @@ pub const Pool = struct {
                 },
                 .array_type => |array_info| {
                     const len = array_info.lenIncludingSentinel();
-                    if (len == 0) return CType.void;
+                    if (len == 0) return .void;
                     const elem_type = Type.fromInterned(array_info.child);
                     const elem_ctype = try pool.fromType(
                         allocator,
@@ -1542,7 +1634,7 @@ pub const Pool = struct {
                         mod,
                         kind.noParameter(),
                     );
-                    if (elem_ctype.index == .void) return CType.void;
+                    if (elem_ctype.index == .void) return .void;
                     const array_ctype = try pool.getArray(allocator, .{
                         .elem_ctype = elem_ctype,
                         .len = len,
@@ -1558,7 +1650,7 @@ pub const Pool = struct {
                     return pool.fromFields(allocator, .@"struct", &fields, kind);
                 },
                 .vector_type => |vector_info| {
-                    if (vector_info.len == 0) return CType.void;
+                    if (vector_info.len == 0) return .void;
                     const elem_type = Type.fromInterned(vector_info.child);
                     const elem_ctype = try pool.fromType(
                         allocator,
@@ -1568,7 +1660,7 @@ pub const Pool = struct {
                         mod,
                         kind.noParameter(),
                     );
-                    if (elem_ctype.index == .void) return CType.void;
+                    if (elem_ctype.index == .void) return .void;
                     const vector_ctype = try pool.getVector(allocator, .{
                         .elem_ctype = elem_ctype,
                         .len = vector_info.len,
@@ -1584,7 +1676,7 @@ pub const Pool = struct {
                     return pool.fromFields(allocator, .@"struct", &fields, kind);
                 },
                 .opt_type => |payload_type| {
-                    if (ip.isNoReturn(payload_type)) return CType.void;
+                    if (ip.isNoReturn(payload_type)) return .void;
                     const payload_ctype = try pool.fromType(
                         allocator,
                         scratch,
@@ -1593,7 +1685,7 @@ pub const Pool = struct {
                         mod,
                         kind.noParameter(),
                     );
-                    if (payload_ctype.index == .void) return CType.bool;
+                    if (payload_ctype.index == .void) return .bool;
                     switch (payload_type) {
                         .anyerror_type => return payload_ctype,
                         else => switch (ip.indexToKey(payload_type)) {
@@ -1606,7 +1698,7 @@ pub const Pool = struct {
                     var fields = [_]Info.Field{
                         .{
                             .name = .{ .index = .is_null },
-                            .ctype = CType.bool,
+                            .ctype = .bool,
                             .alignas = AlignAs.fromAbiAlignment(.@"1"),
                         },
                         .{
@@ -1666,7 +1758,7 @@ pub const Pool = struct {
                             if (kind.isForward()) return if (ty.hasRuntimeBitsIgnoreComptime(zcu))
                                 fwd_decl
                             else
-                                CType.void;
+                                .void;
                             const scratch_top = scratch.items.len;
                             defer scratch.shrinkRetainingCapacity(scratch_top);
                             try scratch.ensureUnusedCapacity(
@@ -1710,7 +1802,7 @@ pub const Pool = struct {
                                 scratch.items.len - scratch_top,
                                 @typeInfo(Field).@"struct".fields.len,
                             ));
-                            if (fields_len == 0) return CType.void;
+                            if (fields_len == 0) return .void;
                             try pool.ensureUnusedCapacity(allocator, 1);
                             const extra_index = try pool.addHashedExtra(allocator, &hasher, Aggregate, .{
                                 .fwd_decl = fwd_decl.index,
@@ -1762,7 +1854,7 @@ pub const Pool = struct {
                         scratch.items.len - scratch_top,
                         @typeInfo(Field).@"struct".fields.len,
                     ));
-                    if (fields_len == 0) return CType.void;
+                    if (fields_len == 0) return .void;
                     if (kind.isForward()) {
                         try pool.ensureUnusedCapacity(allocator, 1);
                         const extra_index = try pool.addHashedExtra(
@@ -1801,7 +1893,7 @@ pub const Pool = struct {
                             if (kind.isForward()) return if (ty.hasRuntimeBitsIgnoreComptime(zcu))
                                 fwd_decl
                             else
-                                CType.void;
+                                .void;
                             const loaded_tag = loaded_union.loadTagType(ip);
                             const scratch_top = scratch.items.len;
                             defer scratch.shrinkRetainingCapacity(scratch_top);
@@ -1848,7 +1940,7 @@ pub const Pool = struct {
                                 @typeInfo(Field).@"struct".fields.len,
                             ));
                             if (!has_tag) {
-                                if (fields_len == 0) return CType.void;
+                                if (fields_len == 0) return .void;
                                 try pool.ensureUnusedCapacity(allocator, 1);
                                 const extra_index = try pool.addHashedExtra(
                                     allocator,
@@ -1915,7 +2007,7 @@ pub const Pool = struct {
                                     struct_fields_len += 1;
                                 }
                             }
-                            if (struct_fields_len == 0) return CType.void;
+                            if (struct_fields_len == 0) return .void;
                             sortFields(struct_fields[0..struct_fields_len]);
                             return pool.getAggregate(allocator, .{
                                 .tag = .@"struct",
@@ -1929,7 +2021,7 @@ pub const Pool = struct {
                         }, mod, kind),
                     }
                 },
-                .opaque_type => return CType.void,
+                .opaque_type => return .void,
                 .enum_type => return pool.fromType(
                     allocator,
                     scratch,
@@ -1938,7 +2030,7 @@ pub const Pool = struct {
                     mod,
                     kind,
                 ),
-                .func_type => |func_info| if (func_info.is_generic) return CType.void else {
+                .func_type => |func_info| if (func_info.is_generic) return .void else {
                     const scratch_top = scratch.items.len;
                     defer scratch.shrinkRetainingCapacity(scratch_top);
                     try scratch.ensureUnusedCapacity(allocator, func_info.param_types.len);
@@ -1952,7 +2044,7 @@ pub const Pool = struct {
                         pt,
                         mod,
                         kind.asParameter(),
-                    ) else CType.void;
+                    ) else .void;
                     for (0..func_info.param_types.len) |param_index| {
                         const param_type = Type.fromInterned(
                             func_info.param_types.get(ip)[param_index],
@@ -2033,7 +2125,7 @@ pub const Pool = struct {
             pub fn eql(map_adapter: @This(), _: CType, _: void, pool_index: usize) bool {
                 return map_adapter.source_info.eqlAdapted(
                     map_adapter.source_pool,
-                    CType.fromPoolIndex(pool_index),
+                    .fromPoolIndex(pool_index),
                     map_adapter.pool,
                     map_adapter.pool_adapter,
                 );
@@ -2047,7 +2139,7 @@ pub const Pool = struct {
             .pool_adapter = pool_adapter,
         });
         errdefer _ = pool.map.pop();
-        const ctype = CType.fromPoolIndex(gop.index);
+        const ctype: CType = .fromPoolIndex(gop.index);
         if (!gop.found_existing) switch (source_info) {
             .basic => unreachable,
             .pointer => |pointer_info| pool.items.appendAssumeCapacity(.{
@@ -2232,7 +2324,7 @@ pub const Pool = struct {
             CTypeAdapter{ .pool = pool },
         );
         if (!gop.found_existing) pool.items.appendAssumeCapacity(.{ .tag = tag, .data = data });
-        return CType.fromPoolIndex(gop.index);
+        return .fromPoolIndex(gop.index);
     }
 
     fn tagExtra(
@@ -2290,7 +2382,7 @@ pub const Pool = struct {
             pool.extra.shrinkRetainingCapacity(extra_index)
         else
             pool.items.appendAssumeCapacity(.{ .tag = tag, .data = extra_index });
-        return CType.fromPoolIndex(gop.index);
+        return .fromPoolIndex(gop.index);
     }
 
     fn sortFields(fields: []Info.Field) void {
src/codegen/c.zig
@@ -706,7 +706,7 @@ pub const DeclGen = struct {
         const uav_ty = uav_val.typeOf(zcu);
 
         // Render an undefined pointer if we have a pointer to a zero-bit or comptime type.
-        const ptr_ty = Type.fromInterned(uav.orig_ty);
+        const ptr_ty: Type = .fromInterned(uav.orig_ty);
         if (ptr_ty.isPtrAtRuntime(zcu) and !uav_ty.isFnOrHasRuntimeBits(zcu)) {
             return dg.writeCValue(writer, .{ .undef = ptr_ty });
         }
@@ -782,7 +782,7 @@ pub const DeclGen = struct {
         };
 
         // Render an undefined pointer if we have a pointer to a zero-bit or comptime type.
-        const nav_ty = Type.fromInterned(ip.getNav(owner_nav).typeOf(ip));
+        const nav_ty: Type = .fromInterned(ip.getNav(owner_nav).typeOf(ip));
         const ptr_ty = try pt.navPtrType(owner_nav);
         if (!nav_ty.isFnOrHasRuntimeBits(zcu)) {
             return dg.writeCValue(writer, .{ .undef = ptr_ty });
@@ -819,7 +819,7 @@ pub const DeclGen = struct {
             .comptime_alloc_ptr, .comptime_field_ptr => unreachable,
             .int => |int| {
                 const ptr_ctype = try dg.ctypeFromType(int.ptr_ty, .complete);
-                const addr_val = try pt.intValue(Type.usize, int.addr);
+                const addr_val = try pt.intValue(.usize, int.addr);
                 try writer.writeByte('(');
                 try dg.renderCType(writer, ptr_ctype);
                 try writer.print("){x}", .{try dg.fmtIntLiteral(addr_val, .Other)});
@@ -859,7 +859,7 @@ pub const DeclGen = struct {
                         try writer.writeByte('(');
                         try dg.renderCType(writer, ptr_ctype);
                         try writer.writeByte(')');
-                        const offset_val = try pt.intValue(Type.usize, byte_offset);
+                        const offset_val = try pt.intValue(.usize, byte_offset);
                         try writer.writeAll("((char *)");
                         try dg.renderPointer(writer, field.parent.*, location);
                         try writer.print(" + {})", .{try dg.fmtIntLiteral(offset_val, .Other)});
@@ -875,7 +875,7 @@ pub const DeclGen = struct {
                 try writer.writeByte(')');
                 try dg.renderPointer(writer, elem.parent.*, location);
             } else {
-                const index_val = try pt.intValue(Type.usize, elem.elem_idx);
+                const index_val = try pt.intValue(.usize, elem.elem_idx);
                 // We want to do pointer arithmetic on a pointer to the element type.
                 // We might have a pointer-to-array. In this case, we must cast first.
                 const result_ctype = try dg.ctypeFromType(elem.result_ptr_ty, .complete);
@@ -904,7 +904,7 @@ pub const DeclGen = struct {
                 if (oac.byte_offset == 0) {
                     try dg.renderPointer(writer, oac.parent.*, location);
                 } else {
-                    const offset_val = try pt.intValue(Type.usize, oac.byte_offset);
+                    const offset_val = try pt.intValue(.usize, oac.byte_offset);
                     try writer.writeAll("((char *)");
                     try dg.renderPointer(writer, oac.parent.*, location);
                     try writer.print(" + {})", .{try dg.fmtIntLiteral(offset_val, .Other)});
@@ -984,7 +984,7 @@ pub const DeclGen = struct {
                     try writer.writeAll("((");
                     try dg.renderCType(writer, ctype);
                     try writer.print("){x})", .{try dg.fmtIntLiteral(
-                        try pt.intValue(Type.usize, val.toUnsignedInt(zcu)),
+                        try pt.intValue(.usize, val.toUnsignedInt(zcu)),
                         .Other,
                     )});
                 },
@@ -1153,7 +1153,7 @@ pub const DeclGen = struct {
                     else => |payload| switch (ip.indexToKey(payload)) {
                         .undef => |err_ty| try dg.renderUndefValue(
                             writer,
-                            Type.fromInterned(err_ty),
+                            .fromInterned(err_ty),
                             location,
                         ),
                         .err => |err| try dg.renderErrorName(writer, err.name),
@@ -1204,7 +1204,7 @@ pub const DeclGen = struct {
                                 ),
                             },
                             .ptr => try writer.writeAll("NULL"),
-                            .len => try dg.renderUndefValue(writer, Type.usize, initializer_type),
+                            .len => try dg.renderUndefValue(writer, .usize, initializer_type),
                             else => unreachable,
                         }
                     }
@@ -1219,7 +1219,7 @@ pub const DeclGen = struct {
                         try writer.writeByte(')');
                     }
                     const ai = ty.arrayInfo(zcu);
-                    if (ai.elem_type.eql(Type.u8, zcu)) {
+                    if (ai.elem_type.eql(.u8, zcu)) {
                         var literal = stringLiteral(writer, ty.arrayLenIncludingSentinel(zcu));
                         try literal.start();
                         var index: usize = 0;
@@ -1263,7 +1263,7 @@ pub const DeclGen = struct {
                     for (0..tuple.types.len) |field_index| {
                         const comptime_val = tuple.values.get(ip)[field_index];
                         if (comptime_val != .none) continue;
-                        const field_ty = Type.fromInterned(tuple.types.get(ip)[field_index]);
+                        const field_ty: Type = .fromInterned(tuple.types.get(ip)[field_index]);
                         if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
 
                         if (!empty) try writer.writeByte(',');
@@ -1298,7 +1298,7 @@ pub const DeclGen = struct {
                             var field_it = loaded_struct.iterateRuntimeOrder(ip);
                             var need_comma = false;
                             while (field_it.next()) |field_index| {
-                                const field_ty = Type.fromInterned(loaded_struct.field_types.get(ip)[field_index]);
+                                const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
                                 if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
 
                                 if (need_comma) try writer.writeByte(',');
@@ -1325,7 +1325,7 @@ pub const DeclGen = struct {
                             var eff_num_fields: usize = 0;
 
                             for (0..loaded_struct.field_types.len) |field_index| {
-                                const field_ty = Type.fromInterned(loaded_struct.field_types.get(ip)[field_index]);
+                                const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
                                 if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
                                 eff_num_fields += 1;
                             }
@@ -1346,7 +1346,7 @@ pub const DeclGen = struct {
                                 var eff_index: usize = 0;
                                 var needs_closing_paren = false;
                                 for (0..loaded_struct.field_types.len) |field_index| {
-                                    const field_ty = Type.fromInterned(loaded_struct.field_types.get(ip)[field_index]);
+                                    const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
                                     if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
 
                                     const field_val = switch (ip.indexToKey(val.toIntern()).aggregate.storage) {
@@ -1382,7 +1382,7 @@ pub const DeclGen = struct {
                                 // a << a_off | b << b_off | c << c_off
                                 var empty = true;
                                 for (0..loaded_struct.field_types.len) |field_index| {
-                                    const field_ty = Type.fromInterned(loaded_struct.field_types.get(ip)[field_index]);
+                                    const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
                                     if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
 
                                     if (!empty) try writer.writeAll(" | ");
@@ -1466,7 +1466,7 @@ pub const DeclGen = struct {
                     }
 
                     const field_index = zcu.unionTagFieldIndex(loaded_union, Value.fromInterned(un.tag)).?;
-                    const field_ty = Type.fromInterned(loaded_union.field_types.get(ip)[field_index]);
+                    const field_ty: Type = .fromInterned(loaded_union.field_types.get(ip)[field_index]);
                     const field_name = loaded_union.loadTagType(ip).names.get(ip)[field_index];
                     if (loaded_union.flagsUnordered(ip).layout == .@"packed") {
                         if (field_ty.hasRuntimeBits(zcu)) {
@@ -1509,7 +1509,7 @@ pub const DeclGen = struct {
                                     );
                                     try writer.writeByte(' ');
                                 } else for (0..loaded_union.field_types.len) |inner_field_index| {
-                                    const inner_field_ty = Type.fromInterned(
+                                    const inner_field_ty: Type = .fromInterned(
                                         loaded_union.field_types.get(ip)[inner_field_index],
                                     );
                                     if (!inner_field_ty.hasRuntimeBits(zcu)) continue;
@@ -1592,7 +1592,7 @@ pub const DeclGen = struct {
                         try writer.writeAll("((");
                         try dg.renderCType(writer, ctype);
                         return writer.print("){x})", .{
-                            try dg.fmtIntLiteral(try pt.undefValue(Type.usize), .Other),
+                            try dg.fmtIntLiteral(try pt.undefValue(.usize), .Other),
                         });
                     },
                     .slice => {
@@ -1606,14 +1606,14 @@ pub const DeclGen = struct {
                         const ptr_ty = ty.slicePtrFieldType(zcu);
                         try dg.renderType(writer, ptr_ty);
                         return writer.print("){x}, {0x}}}", .{
-                            try dg.fmtIntLiteral(try dg.pt.undefValue(Type.usize), .Other),
+                            try dg.fmtIntLiteral(try dg.pt.undefValue(.usize), .Other),
                         });
                     },
                 },
                 .opt_type => |child_type| switch (ctype.info(ctype_pool)) {
                     .basic, .pointer => try dg.renderUndefValue(
                         writer,
-                        Type.fromInterned(if (ctype.isBool()) .bool_type else child_type),
+                        .fromInterned(if (ctype.isBool()) .bool_type else child_type),
                         location,
                     ),
                     .aligned, .array, .vector, .fwd_decl, .function => unreachable,
@@ -1622,7 +1622,7 @@ pub const DeclGen = struct {
                             .is_null, .payload => {},
                             .ptr, .len => return dg.renderUndefValue(
                                 writer,
-                                Type.fromInterned(child_type),
+                                .fromInterned(child_type),
                                 location,
                             ),
                             else => unreachable,
@@ -1635,7 +1635,7 @@ pub const DeclGen = struct {
                         try writer.writeByte('{');
                         for (0..aggregate.fields.len) |field_index| {
                             if (field_index > 0) try writer.writeByte(',');
-                            try dg.renderUndefValue(writer, Type.fromInterned(
+                            try dg.renderUndefValue(writer, .fromInterned(
                                 switch (aggregate.fields.at(field_index, ctype_pool).name.index) {
                                     .is_null => .bool_type,
                                     .payload => child_type,
@@ -1660,7 +1660,7 @@ pub const DeclGen = struct {
                             var field_it = loaded_struct.iterateRuntimeOrder(ip);
                             var need_comma = false;
                             while (field_it.next()) |field_index| {
-                                const field_ty = Type.fromInterned(loaded_struct.field_types.get(ip)[field_index]);
+                                const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
                                 if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
 
                                 if (need_comma) try writer.writeByte(',');
@@ -1685,7 +1685,7 @@ pub const DeclGen = struct {
                     var need_comma = false;
                     for (0..tuple_info.types.len) |field_index| {
                         if (tuple_info.values.get(ip)[field_index] != .none) continue;
-                        const field_ty = Type.fromInterned(tuple_info.types.get(ip)[field_index]);
+                        const field_ty: Type = .fromInterned(tuple_info.types.get(ip)[field_index]);
                         if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
 
                         if (need_comma) try writer.writeByte(',');
@@ -1715,13 +1715,13 @@ pub const DeclGen = struct {
                                     .payload) {
                                     .tag => try dg.renderUndefValue(
                                         writer,
-                                        Type.fromInterned(loaded_union.enum_tag_ty),
+                                        .fromInterned(loaded_union.enum_tag_ty),
                                         initializer_type,
                                     ),
                                     .payload => {
                                         try writer.writeByte('{');
                                         for (0..loaded_union.field_types.len) |inner_field_index| {
-                                            const inner_field_ty = Type.fromInterned(
+                                            const inner_field_ty: Type = .fromInterned(
                                                 loaded_union.field_types.get(ip)[inner_field_index],
                                             );
                                             if (!inner_field_ty.hasRuntimeBits(pt.zcu)) continue;
@@ -1747,7 +1747,7 @@ pub const DeclGen = struct {
                 .error_union_type => |error_union_type| switch (ctype.info(ctype_pool)) {
                     .basic => try dg.renderUndefValue(
                         writer,
-                        Type.fromInterned(error_union_type.error_set_type),
+                        .fromInterned(error_union_type.error_set_type),
                         location,
                     ),
                     .pointer, .aligned, .array, .vector, .fwd_decl, .function => unreachable,
@@ -1762,7 +1762,7 @@ pub const DeclGen = struct {
                             if (field_index > 0) try writer.writeByte(',');
                             try dg.renderUndefValue(
                                 writer,
-                                Type.fromInterned(
+                                .fromInterned(
                                     switch (aggregate.fields.at(field_index, ctype_pool).name.index) {
                                         .@"error" => error_union_type.error_set_type,
                                         .payload => error_union_type.payload_type,
@@ -1777,7 +1777,7 @@ pub const DeclGen = struct {
                 },
                 .array_type, .vector_type => {
                     const ai = ty.arrayInfo(zcu);
-                    if (ai.elem_type.eql(Type.u8, zcu)) {
+                    if (ai.elem_type.eql(.u8, zcu)) {
                         const c_len = ty.arrayLenIncludingSentinel(zcu);
                         var literal = stringLiteral(writer, c_len);
                         try literal.start();
@@ -1981,8 +1981,8 @@ pub const DeclGen = struct {
 
         const src_is_ptr = src_ty.isPtrAtRuntime(pt.zcu);
         const src_eff_ty: Type = if (src_is_ptr) switch (dest_int_info.signedness) {
-            .unsigned => Type.usize,
-            .signed => Type.isize,
+            .unsigned => .usize,
+            .signed => .isize,
         } else src_ty;
 
         const src_bits = src_eff_ty.bitSize(zcu);
@@ -2022,8 +2022,8 @@ pub const DeclGen = struct {
 
         const src_is_ptr = src_ty.isPtrAtRuntime(zcu);
         const src_eff_ty: Type = if (src_is_ptr) switch (dest_int_info.signedness) {
-            .unsigned => Type.usize,
-            .signed => Type.isize,
+            .unsigned => .usize,
+            .signed => .isize,
         } else src_ty;
 
         const src_bits = src_eff_ty.bitSize(zcu);
@@ -2249,7 +2249,7 @@ pub const DeclGen = struct {
         if (flags.is_threadlocal and !dg.mod.single_threaded) try fwd.writeAll("zig_threadlocal ");
         try dg.renderTypeAndName(
             fwd,
-            Type.fromInterned(nav.typeOf(ip)),
+            .fromInterned(nav.typeOf(ip)),
             .{ .nav = nav_index },
             CQualifiers.init(.{ .@"const" = flags.is_const }),
             nav.getAlignment(),
@@ -2321,7 +2321,7 @@ pub const DeclGen = struct {
 
         if (is_big) try writer.print(", {}", .{int_info.signedness == .signed});
         try writer.print(", {}", .{try dg.fmtIntLiteral(
-            try pt.intValue(if (is_big) Type.u16 else Type.u8, int_info.bits),
+            try pt.intValue(if (is_big) .u16 else .u8, int_info.bits),
             .FunctionArgument,
         )});
     }
@@ -2657,7 +2657,7 @@ pub fn genTypeDecl(
             },
             .index => |index| if (!found_existing) {
                 const ip = &zcu.intern_pool;
-                const ty = Type.fromInterned(index);
+                const ty: Type = .fromInterned(index);
                 _ = try renderTypePrefix(.flush, global_ctype_pool, zcu, writer, global_ctype, .suffix, .{});
                 try writer.writeByte(';');
                 const file_scope = ty.typeDeclInstAllowGeneratedTag(zcu).?.resolveFile(ip);
@@ -2772,7 +2772,7 @@ pub fn genErrDecls(o: *Object) !void {
         if (val > 1) try writer.writeAll(", ");
         try writer.print("{{" ++ name_prefix ++ "{}, {}}}", .{
             fmtIdent(name),
-            try o.dg.fmtIntLiteral(try pt.intValue(Type.usize, name.len), .StaticInitializer),
+            try o.dg.fmtIntLiteral(try pt.intValue(.usize, name.len), .StaticInitializer),
         });
     }
     try writer.writeAll("};\n");
@@ -2788,8 +2788,8 @@ pub fn genLazyFn(o: *Object, lazy_ctype_pool: *const CType.Pool, lazy_fn: LazyFn
     const val = lazy_fn.value_ptr;
     switch (key) {
         .tag_name => |enum_ty_ip| {
-            const enum_ty = Type.fromInterned(enum_ty_ip);
-            const name_slice_ty = Type.slice_const_u8_sentinel_0;
+            const enum_ty: Type = .fromInterned(enum_ty_ip);
+            const name_slice_ty: Type = .slice_const_u8_sentinel_0;
 
             try w.writeAll("static ");
             try o.dg.renderType(w, name_slice_ty);
@@ -2822,7 +2822,7 @@ pub fn genLazyFn(o: *Object, lazy_ctype_pool: *const CType.Pool, lazy_fn: LazyFn
                 try o.dg.renderType(w, name_slice_ty);
                 try w.print("){{{}, {}}};\n", .{
                     fmtIdent("name"),
-                    try o.dg.fmtIntLiteral(try pt.intValue(Type.usize, tag_name_len), .Other),
+                    try o.dg.fmtIntLiteral(try pt.intValue(.usize, tag_name_len), .Other),
                 });
 
                 try w.writeAll("  }\n");
@@ -2966,7 +2966,7 @@ pub fn genDecl(o: *Object) !void {
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
     const nav = ip.getNav(o.dg.pass.nav);
-    const nav_ty = Type.fromInterned(nav.typeOf(ip));
+    const nav_ty: Type = .fromInterned(nav.typeOf(ip));
 
     if (!nav_ty.isFnOrHasRuntimeBitsIgnoreComptime(zcu)) return;
     switch (ip.indexToKey(nav.status.fully_resolved.val)) {
@@ -3717,7 +3717,7 @@ fn airArg(f: *Function, inst: Air.Inst.Index) !CValue {
     if (f.liveness.isUnused(inst)) {
         const writer = f.object.writer();
         try writer.writeByte('(');
-        try f.renderType(writer, Type.void);
+        try f.renderType(writer, .void);
         try writer.writeByte(')');
         try f.writeCValue(writer, result, .Other);
         try writer.writeAll(";\n");
@@ -3735,7 +3735,7 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
     const ptr_ty = f.typeOf(ty_op.operand);
     const ptr_scalar_ty = ptr_ty.scalarType(zcu);
     const ptr_info = ptr_scalar_ty.ptrInfo(zcu);
-    const src_ty = Type.fromInterned(ptr_info.child);
+    const src_ty: Type = .fromInterned(ptr_info.child);
 
     if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
         try reap(f, inst, &.{ty_op.operand});
@@ -3953,7 +3953,7 @@ fn airTrunc(f: *Function, inst: Air.Inst.Index) !CValue {
         .signed => {
             const c_bits = toCIntBits(scalar_int_info.bits) orelse
                 return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});
-            const shift_val = try pt.intValue(Type.u8, c_bits - dest_bits);
+            const shift_val = try pt.intValue(.u8, c_bits - dest_bits);
 
             try writer.writeAll("zig_shr_");
             try f.object.dg.renderTypeForBuiltinFnName(writer, scalar_ty);
@@ -4019,7 +4019,7 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
             try writer.writeAll("memset(");
             try f.writeCValue(writer, ptr_val, .FunctionArgument);
             try writer.writeAll(", 0xaa, sizeof(");
-            try f.renderType(writer, Type.fromInterned(ptr_info.child));
+            try f.renderType(writer, .fromInterned(ptr_info.child));
             try writer.writeAll("));\n");
         }
         return .none;
@@ -4029,7 +4029,7 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
         ptr_info.flags.alignment.order(src_ty.abiAlignment(zcu)).compare(.gte)
     else
         true;
-    const is_array = lowersToArray(Type.fromInterned(ptr_info.child), pt);
+    const is_array = lowersToArray(.fromInterned(ptr_info.child), pt);
     const need_memcpy = !is_aligned or is_array;
 
     const src_val = try f.resolveInst(bin_op.rhs);
@@ -4040,7 +4040,7 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
     if (need_memcpy) {
         // For this memcpy to safely work we need the rhs to have the same
         // underlying type as the lhs (i.e. they must both be arrays of the same underlying type).
-        assert(src_ty.eql(Type.fromInterned(ptr_info.child), zcu));
+        assert(src_ty.eql(.fromInterned(ptr_info.child), zcu));
 
         // If the source is a constant, writeCValue will emit a brace initialization
         // so work around this by initializing into new local.
@@ -4120,7 +4120,7 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
 
         if (src_ty.isPtrAtRuntime(zcu)) {
             try writer.writeByte('(');
-            try f.renderType(writer, Type.usize);
+            try f.renderType(writer, .usize);
             try writer.writeByte(')');
         }
         try f.writeCValue(writer, src_val, .Other);
@@ -4343,8 +4343,8 @@ fn airEquality(
     try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
 
     const writer = f.object.writer();
-    const local = try f.allocLocal(inst, Type.bool);
-    const a = try Assignment.start(f, writer, CType.bool);
+    const local = try f.allocLocal(inst, .bool);
+    const a = try Assignment.start(f, writer, .bool);
     try f.writeCValue(writer, local, .Other);
     try a.assign(f, writer);
 
@@ -4401,7 +4401,7 @@ fn airCmpLtErrorsLen(f: *Function, inst: Air.Inst.Index) !CValue {
     try reap(f, inst, &.{un_op});
 
     const writer = f.object.writer();
-    const local = try f.allocLocal(inst, Type.bool);
+    const local = try f.allocLocal(inst, .bool);
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(" = ");
     try f.writeCValue(writer, operand, .Other);
@@ -4517,7 +4517,7 @@ fn airSlice(f: *Function, inst: Air.Inst.Index) !CValue {
         try a.end(f, writer);
     }
     {
-        const a = try Assignment.start(f, writer, CType.usize);
+        const a = try Assignment.start(f, writer, .usize);
         try f.writeCValueMember(writer, local, .{ .identifier = "len" });
         try a.assign(f, writer);
         try f.writeCValue(writer, len, .Initializer);
@@ -4585,9 +4585,9 @@ fn airCall(
         else => unreachable,
     };
     const fn_info = zcu.typeToFunc(if (callee_is_ptr) callee_ty.childType(zcu) else callee_ty).?;
-    const ret_ty = Type.fromInterned(fn_info.return_type);
+    const ret_ty: Type = .fromInterned(fn_info.return_type);
     const ret_ctype: CType = if (ret_ty.isNoReturn(zcu))
-        CType.void
+        .void
     else
         try f.ctypeFromType(ret_ty, .parameter);
 
@@ -4599,7 +4599,7 @@ fn airCall(
             break :result .none;
         } else if (f.liveness.isUnused(inst)) {
             try writer.writeByte('(');
-            try f.renderCType(writer, CType.void);
+            try f.renderCType(writer, .void);
             try writer.writeByte(')');
             break :result .none;
         } else {
@@ -5081,20 +5081,20 @@ fn airBreakpoint(writer: anytype) !CValue {
 
 fn airRetAddr(f: *Function, inst: Air.Inst.Index) !CValue {
     const writer = f.object.writer();
-    const local = try f.allocLocal(inst, Type.usize);
+    const local = try f.allocLocal(inst, .usize);
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(" = (");
-    try f.renderType(writer, Type.usize);
+    try f.renderType(writer, .usize);
     try writer.writeAll(")zig_return_address();\n");
     return local;
 }
 
 fn airFrameAddress(f: *Function, inst: Air.Inst.Index) !CValue {
     const writer = f.object.writer();
-    const local = try f.allocLocal(inst, Type.usize);
+    const local = try f.allocLocal(inst, .usize);
     try f.writeCValue(writer, local, .Other);
     try writer.writeAll(" = (");
-    try f.renderType(writer, Type.usize);
+    try f.renderType(writer, .usize);
     try writer.writeAll(")zig_frame_address();\n");
     return local;
 }
@@ -5179,10 +5179,10 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index, is_dispatch_loop: bool) !void
 
     try writer.writeAll("switch (");
 
-    const lowered_condition_ty = if (condition_ty.toIntern() == .bool_type)
-        Type.u1
+    const lowered_condition_ty: Type = if (condition_ty.toIntern() == .bool_type)
+        .u1
     else if (condition_ty.isPtrAtRuntime(zcu))
-        Type.usize
+        .usize
     else
         condition_ty;
     if (condition_ty.toIntern() != lowered_condition_ty.toIntern()) {
@@ -5219,7 +5219,7 @@ fn airSwitchBr(f: *Function, inst: Air.Inst.Index, is_dispatch_loop: bool) !void
                 }
                 if (condition_ty.isPtrAtRuntime(zcu)) {
                     try writer.writeByte('(');
-                    try f.renderType(writer, Type.usize);
+                    try f.renderType(writer, .usize);
                     try writer.writeByte(')');
                 }
                 try f.object.dg.renderValue(writer, (try f.air.value(item, pt)).?, .Other);
@@ -5604,8 +5604,8 @@ fn airIsNull(
     const operand = try f.resolveInst(un_op);
     try reap(f, inst, &.{un_op});
 
-    const local = try f.allocLocal(inst, Type.bool);
-    const a = try Assignment.start(f, writer, CType.bool);
+    const local = try f.allocLocal(inst, .bool);
+    const a = try Assignment.start(f, writer, .bool);
     try f.writeCValue(writer, local, .Other);
     try a.assign(f, writer);
 
@@ -5750,7 +5750,7 @@ fn fieldLocation(
 } {
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
-    const container_ty = Type.fromInterned(ip.indexToKey(container_ptr_ty.toIntern()).ptr_type.child);
+    const container_ty: Type = .fromInterned(ip.indexToKey(container_ptr_ty.toIntern()).ptr_type.child);
     switch (ip.indexToKey(container_ty.toIntern())) {
         .struct_type => {
             const loaded_struct = ip.loadStructType(container_ty.toIntern());
@@ -5781,7 +5781,7 @@ fn fieldLocation(
             const loaded_union = ip.loadUnionType(container_ty.toIntern());
             switch (loaded_union.flagsUnordered(ip).layout) {
                 .auto, .@"extern" => {
-                    const field_ty = Type.fromInterned(loaded_union.field_types.get(ip)[field_index]);
+                    const field_ty: Type = .fromInterned(loaded_union.field_types.get(ip)[field_index]);
                     if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu))
                         return if (loaded_union.hasTag(ip) and !container_ty.unionHasAllZeroBitFieldTypes(zcu))
                             .{ .field = .{ .identifier = "payload" } }
@@ -5850,7 +5850,7 @@ fn airFieldParentPtr(f: *Function, inst: Air.Inst.Index) !CValue {
     switch (fieldLocation(container_ptr_ty, field_ptr_ty, extra.field_index, pt)) {
         .begin => try f.writeCValue(writer, field_ptr_val, .Initializer),
         .field => |field| {
-            const u8_ptr_ty = try pt.adjustPtrTypeChild(field_ptr_ty, Type.u8);
+            const u8_ptr_ty = try pt.adjustPtrTypeChild(field_ptr_ty, .u8);
 
             try writer.writeAll("((");
             try f.renderType(writer, u8_ptr_ty);
@@ -5863,14 +5863,14 @@ fn airFieldParentPtr(f: *Function, inst: Air.Inst.Index) !CValue {
             try writer.writeAll("))");
         },
         .byte_offset => |byte_offset| {
-            const u8_ptr_ty = try pt.adjustPtrTypeChild(field_ptr_ty, Type.u8);
+            const u8_ptr_ty = try pt.adjustPtrTypeChild(field_ptr_ty, .u8);
 
             try writer.writeAll("((");
             try f.renderType(writer, u8_ptr_ty);
             try writer.writeByte(')');
             try f.writeCValue(writer, field_ptr_val, .Other);
             try writer.print(" - {})", .{
-                try f.fmtIntLiteral(try pt.intValue(Type.usize, byte_offset)),
+                try f.fmtIntLiteral(try pt.intValue(.usize, byte_offset)),
             });
         },
     }
@@ -5908,14 +5908,14 @@ fn fieldPtr(
             try f.writeCValueDerefMember(writer, container_ptr_val, field);
         },
         .byte_offset => |byte_offset| {
-            const u8_ptr_ty = try pt.adjustPtrTypeChild(field_ptr_ty, Type.u8);
+            const u8_ptr_ty = try pt.adjustPtrTypeChild(field_ptr_ty, .u8);
 
             try writer.writeAll("((");
             try f.renderType(writer, u8_ptr_ty);
             try writer.writeByte(')');
             try f.writeCValue(writer, container_ptr_val, .Other);
             try writer.print(" + {})", .{
-                try f.fmtIntLiteral(try pt.intValue(Type.usize, byte_offset)),
+                try f.fmtIntLiteral(try pt.intValue(.usize, byte_offset)),
             });
         },
     }
@@ -6158,7 +6158,7 @@ fn airWrapOptional(f: *Function, inst: Air.Inst.Index) !CValue {
                 const writer = f.object.writer();
                 const local = try f.allocLocal(inst, inst_ty);
                 {
-                    const a = try Assignment.start(f, writer, CType.bool);
+                    const a = try Assignment.start(f, writer, .bool);
                     try f.writeCValueMember(writer, local, .{ .identifier = "is_null" });
                     try a.assign(f, writer);
                     try writer.writeAll("false");
@@ -6322,12 +6322,12 @@ fn airIsErr(f: *Function, inst: Air.Inst.Index, is_ptr: bool, operator: []const
     const operand = try f.resolveInst(un_op);
     try reap(f, inst, &.{un_op});
     const operand_ty = f.typeOf(un_op);
-    const local = try f.allocLocal(inst, Type.bool);
+    const local = try f.allocLocal(inst, .bool);
     const err_union_ty = if (is_ptr) operand_ty.childType(zcu) else operand_ty;
     const payload_ty = err_union_ty.errorUnionPayload(zcu);
     const error_ty = err_union_ty.errorUnionSet(zcu);
 
-    const a = try Assignment.start(f, writer, CType.bool);
+    const a = try Assignment.start(f, writer, .bool);
     try f.writeCValue(writer, local, .Other);
     try a.assign(f, writer);
     const err_int_ty = try pt.errorIntType();
@@ -6385,17 +6385,17 @@ fn airArrayToSlice(f: *Function, inst: Air.Inst.Index) !CValue {
             if (operand_child_ctype.info(ctype_pool) == .array) {
                 try writer.writeByte('&');
                 try f.writeCValueDeref(writer, operand);
-                try writer.print("[{}]", .{try f.fmtIntLiteral(try pt.intValue(Type.usize, 0))});
+                try writer.print("[{}]", .{try f.fmtIntLiteral(try pt.intValue(.usize, 0))});
             } else try f.writeCValue(writer, operand, .Initializer);
         }
         try a.end(f, writer);
     }
     {
-        const a = try Assignment.start(f, writer, CType.usize);
+        const a = try Assignment.start(f, writer, .usize);
         try f.writeCValueMember(writer, local, .{ .identifier = "len" });
         try a.assign(f, writer);
         try writer.print("{}", .{
-            try f.fmtIntLiteral(try pt.intValue(Type.usize, array_ty.arrayLen(zcu))),
+            try f.fmtIntLiteral(try pt.intValue(.usize, array_ty.arrayLen(zcu))),
         });
         try a.end(f, writer);
     }
@@ -6627,7 +6627,7 @@ fn airCmpBuiltinCall(
     try writer.writeByte(')');
     if (!ref_ret) try writer.print("{s}{}", .{
         compareOperatorC(operator),
-        try f.fmtIntLiteral(try pt.intValue(Type.i32, 0)),
+        try f.fmtIntLiteral(try pt.intValue(.i32, 0)),
     });
     try writer.writeAll(";\n");
     try v.end(f, inst, writer);
@@ -6707,7 +6707,7 @@ fn airCmpxchg(f: *Function, inst: Air.Inst.Index, flavor: [*:0]const u8) !CValue
             try a.end(f, writer);
         }
         {
-            const a = try Assignment.start(f, writer, CType.bool);
+            const a = try Assignment.start(f, writer, .bool);
             try f.writeCValueMember(writer, local, .{ .identifier = "is_null" });
             try a.assign(f, writer);
             try writer.print("zig_cmpxchg_{s}((zig_atomic(", .{flavor});
@@ -6935,12 +6935,12 @@ fn airMemset(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
             },
         });
 
-        const index = try f.allocLocal(inst, Type.usize);
+        const index = try f.allocLocal(inst, .usize);
 
         try writer.writeAll("for (");
         try f.writeCValue(writer, index, .Other);
         try writer.writeAll(" = ");
-        try f.object.dg.renderValue(writer, try pt.intValue(Type.usize, 0), .Initializer);
+        try f.object.dg.renderValue(writer, try pt.intValue(.usize, 0), .Initializer);
         try writer.writeAll("; ");
         try f.writeCValue(writer, index, .Other);
         try writer.writeAll(" != ");
@@ -6976,7 +6976,7 @@ fn airMemset(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
         return .none;
     }
 
-    const bitcasted = try bitcast(f, Type.u8, value, elem_ty);
+    const bitcasted = try bitcast(f, .u8, value, elem_ty);
 
     try writer.writeAll("memset(");
     switch (dest_ty.ptrSize(zcu)) {
@@ -7038,7 +7038,7 @@ fn writeArrayLen(f: *Function, writer: ArrayListWriter, dest_ptr: CValue, dest_t
     const zcu = pt.zcu;
     switch (dest_ty.ptrSize(zcu)) {
         .one => try writer.print("{}", .{
-            try f.fmtIntLiteral(try pt.intValue(Type.usize, dest_ty.childType(zcu).arrayLen(zcu))),
+            try f.fmtIntLiteral(try pt.intValue(.usize, dest_ty.childType(zcu).arrayLen(zcu))),
         }),
         .many, .c => unreachable,
         .slice => try f.writeCValueMember(writer, dest_ptr, .{ .identifier = "len" }),
@@ -7200,11 +7200,11 @@ fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue {
     for (0..extra.mask_len) |index| {
         try f.writeCValue(writer, local, .Other);
         try writer.writeByte('[');
-        try f.object.dg.renderValue(writer, try pt.intValue(Type.usize, index), .Other);
+        try f.object.dg.renderValue(writer, try pt.intValue(.usize, index), .Other);
         try writer.writeAll("] = ");
 
         const mask_elem = (try mask.elemValue(pt, index)).toSignedInt(zcu);
-        const src_val = try pt.intValue(Type.usize, @as(u64, @intCast(mask_elem ^ mask_elem >> 63)));
+        const src_val = try pt.intValue(.usize, @as(u64, @intCast(mask_elem ^ mask_elem >> 63)));
 
         try f.writeCValue(writer, if (mask_elem >= 0) lhs else rhs, .Other);
         try writer.writeByte('[');
@@ -7377,7 +7377,7 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
     switch (ip.indexToKey(inst_ty.toIntern())) {
         inline .array_type, .vector_type => |info, tag| {
             const a: Assignment = .{
-                .ctype = try f.ctypeFromType(Type.fromInterned(info.child), .complete),
+                .ctype = try f.ctypeFromType(.fromInterned(info.child), .complete),
             };
             for (resolved_elements, 0..) |element, i| {
                 try a.restart(f, writer);
@@ -7402,7 +7402,7 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
                 .auto, .@"extern" => {
                     var field_it = loaded_struct.iterateRuntimeOrder(ip);
                     while (field_it.next()) |field_index| {
-                        const field_ty = Type.fromInterned(loaded_struct.field_types.get(ip)[field_index]);
+                        const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
                         if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
 
                         const a = try Assignment.start(f, writer, try f.ctypeFromType(field_ty, .complete));
@@ -7466,8 +7466,8 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
                             if (field_ty.isPtrAtRuntime(zcu)) {
                                 try writer.writeByte('(');
                                 try f.renderType(writer, switch (int_info.signedness) {
-                                    .unsigned => Type.usize,
-                                    .signed => Type.isize,
+                                    .unsigned => .usize,
+                                    .signed => .isize,
                                 });
                                 try writer.writeByte(')');
                             }
@@ -7501,7 +7501,7 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
         },
         .tuple_type => |tuple_info| for (0..tuple_info.types.len) |field_index| {
             if (tuple_info.values.get(ip)[field_index] != .none) continue;
-            const field_ty = Type.fromInterned(tuple_info.types.get(ip)[field_index]);
+            const field_ty: Type = .fromInterned(tuple_info.types.get(ip)[field_index]);
             if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
 
             const a = try Assignment.start(f, writer, try f.ctypeFromType(field_ty, .complete));
@@ -8141,13 +8141,13 @@ fn formatIntLiteral(
     } = switch (data.ctype.info(ctype_pool)) {
         .basic => |basic_info| switch (basic_info) {
             else => .{
-                .ctype = CType.void,
+                .ctype = .void,
                 .count = 1,
                 .endian = .little,
                 .homogeneous = true,
             },
             .zig_u128, .zig_i128 => .{
-                .ctype = CType.u64,
+                .ctype = .u64,
                 .count = 2,
                 .endian = .big,
                 .homogeneous = false,
@@ -8253,7 +8253,7 @@ fn formatIntLiteral(
                 .int_info = c_limb_int_info,
                 .kind = data.kind,
                 .ctype = c_limb_ctype,
-                .val = try pt.intValue_big(Type.comptime_int, c_limb_mut.toConst()),
+                .val = try pt.intValue_big(.comptime_int, c_limb_mut.toConst()),
             }, fmt, options, writer);
         }
     }
@@ -8330,15 +8330,15 @@ const Vectorize = struct {
         const pt = f.object.dg.pt;
         const zcu = pt.zcu;
         return if (ty.zigTypeTag(zcu) == .vector) index: {
-            const local = try f.allocLocal(inst, Type.usize);
+            const local = try f.allocLocal(inst, .usize);
 
             try writer.writeAll("for (");
             try f.writeCValue(writer, local, .Other);
-            try writer.print(" = {d}; ", .{try f.fmtIntLiteral(try pt.intValue(Type.usize, 0))});
+            try writer.print(" = {d}; ", .{try f.fmtIntLiteral(try pt.intValue(.usize, 0))});
             try f.writeCValue(writer, local, .Other);
-            try writer.print(" < {d}; ", .{try f.fmtIntLiteral(try pt.intValue(Type.usize, ty.vectorLen(zcu)))});
+            try writer.print(" < {d}; ", .{try f.fmtIntLiteral(try pt.intValue(.usize, ty.vectorLen(zcu)))});
             try f.writeCValue(writer, local, .Other);
-            try writer.print(" += {d}) {{\n", .{try f.fmtIntLiteral(try pt.intValue(Type.usize, 1))});
+            try writer.print(" += {d}) {{\n", .{try f.fmtIntLiteral(try pt.intValue(.usize, 1))});
             f.object.indent_writer.pushIndent();
 
             break :index .{ .index = local };
src/Air.zig
@@ -984,6 +984,12 @@ pub const Inst = struct {
         single_const_pointer_to_comptime_int_type = @intFromEnum(InternPool.Index.single_const_pointer_to_comptime_int_type),
         slice_const_u8_type = @intFromEnum(InternPool.Index.slice_const_u8_type),
         slice_const_u8_sentinel_0_type = @intFromEnum(InternPool.Index.slice_const_u8_sentinel_0_type),
+        vector_4_f16_type = @intFromEnum(InternPool.Index.vector_4_f16_type),
+        vector_8_f16_type = @intFromEnum(InternPool.Index.vector_8_f16_type),
+        vector_4_f32_type = @intFromEnum(InternPool.Index.vector_4_f32_type),
+        vector_8_f32_type = @intFromEnum(InternPool.Index.vector_8_f32_type),
+        vector_2_f64_type = @intFromEnum(InternPool.Index.vector_2_f64_type),
+        vector_4_f64_type = @intFromEnum(InternPool.Index.vector_4_f64_type),
         optional_noreturn_type = @intFromEnum(InternPool.Index.optional_noreturn_type),
         anyerror_void_error_union_type = @intFromEnum(InternPool.Index.anyerror_void_error_union_type),
         adhoc_inferred_error_set_type = @intFromEnum(InternPool.Index.adhoc_inferred_error_set_type),
src/InternPool.zig
@@ -4559,12 +4559,21 @@ pub const Index = enum(u32) {
     null_type,
     undefined_type,
     enum_literal_type,
+
     manyptr_u8_type,
     manyptr_const_u8_type,
     manyptr_const_u8_sentinel_0_type,
     single_const_pointer_to_comptime_int_type,
     slice_const_u8_type,
     slice_const_u8_sentinel_0_type,
+
+    vector_4_f16_type,
+    vector_8_f16_type,
+    vector_4_f32_type,
+    vector_8_f32_type,
+    vector_2_f64_type,
+    vector_4_f64_type,
+
     optional_noreturn_type,
     anyerror_void_error_union_type,
     /// Used for the inferred error set of inline/comptime function calls.
@@ -5055,6 +5064,19 @@ pub const static_keys = [_]Key{
         },
     } },
 
+    // @Vector(4, f16)
+    .{ .vector_type = .{ .len = 4, .child = .f16_type } },
+    // @Vector(8, f16)
+    .{ .vector_type = .{ .len = 8, .child = .f16_type } },
+    // @Vector(4, f32)
+    .{ .vector_type = .{ .len = 4, .child = .f32_type } },
+    // @Vector(8, f32)
+    .{ .vector_type = .{ .len = 8, .child = .f32_type } },
+    // @Vector(2, f64)
+    .{ .vector_type = .{ .len = 2, .child = .f64_type } },
+    // @Vector(4, f64)
+    .{ .vector_type = .{ .len = 4, .child = .f64_type } },
+
     // ?noreturn
     .{ .opt_type = .noreturn_type },
 
@@ -11681,6 +11703,12 @@ pub fn typeOf(ip: *const InternPool, index: Index) Index {
         .single_const_pointer_to_comptime_int_type,
         .slice_const_u8_type,
         .slice_const_u8_sentinel_0_type,
+        .vector_4_f16_type,
+        .vector_8_f16_type,
+        .vector_4_f32_type,
+        .vector_8_f32_type,
+        .vector_2_f64_type,
+        .vector_4_f64_type,
         .optional_noreturn_type,
         .anyerror_void_error_union_type,
         .adhoc_inferred_error_set_type,
@@ -11998,6 +12026,14 @@ pub fn zigTypeTag(ip: *const InternPool, index: Index) std.builtin.TypeId {
         .slice_const_u8_sentinel_0_type,
         => .pointer,
 
+        .vector_4_f16_type,
+        .vector_8_f16_type,
+        .vector_4_f32_type,
+        .vector_8_f32_type,
+        .vector_2_f64_type,
+        .vector_4_f64_type,
+        => .vector,
+
         .optional_noreturn_type => .optional,
         .anyerror_void_error_union_type => .error_union,
         .empty_tuple_type => .@"struct",
src/Sema.zig
@@ -36611,6 +36611,12 @@ pub fn typeHasOnePossibleValue(sema: *Sema, ty: Type) CompileError!?Value {
         .single_const_pointer_to_comptime_int_type,
         .slice_const_u8_type,
         .slice_const_u8_sentinel_0_type,
+        .vector_4_f16_type,
+        .vector_8_f16_type,
+        .vector_4_f32_type,
+        .vector_8_f32_type,
+        .vector_2_f64_type,
+        .vector_4_f64_type,
         .anyerror_void_error_union_type,
         => null,
         .void_type => Value.void,
src/Type.zig
@@ -4174,7 +4174,15 @@ pub const single_const_pointer_to_comptime_int: Type = .{
     .ip_index = .single_const_pointer_to_comptime_int_type,
 };
 pub const slice_const_u8_sentinel_0: Type = .{ .ip_index = .slice_const_u8_sentinel_0_type };
-pub const empty_tuple_type: Type = .{ .ip_index = .empty_tuple_type };
+
+pub const vector_4_f16: Type = .{ .ip_index = .vector_4_f16_type };
+pub const vector_8_f16: Type = .{ .ip_index = .vector_8_f16_type };
+pub const vector_4_f32: Type = .{ .ip_index = .vector_4_f32_type };
+pub const vector_8_f32: Type = .{ .ip_index = .vector_8_f32_type };
+pub const vector_2_f64: Type = .{ .ip_index = .vector_2_f64_type };
+pub const vector_4_f64: Type = .{ .ip_index = .vector_4_f64_type };
+
+pub const empty_tuple: Type = .{ .ip_index = .empty_tuple_type };
 
 pub const generic_poison: Type = .{ .ip_index = .generic_poison_type };
 
test/behavior/x86_64/build.zig
@@ -32,6 +32,7 @@ pub fn build(b: *std.Build) void {
             .cpu_features_sub = std.Target.x86.featureSet(&.{
                 .cmov,
                 //.sse,
+                .sse2,
             }),
         },
         //.{
test/behavior/x86_64/math.zig
@@ -22,16 +22,20 @@ inline fn sign(rhs: anytype) switch (@typeInfo(@TypeOf(rhs))) {
     .vector => |vector| @Vector(vector.len, bool),
 } {
     switch (@typeInfo(@TypeOf(rhs))) {
-        else => return @as(@Type(.{ .int = .{
-            .signedness = .signed,
-            .bits = @bitSizeOf(@TypeOf(rhs)),
-        } }), @bitCast(rhs)) < 0,
+        else => {
+            const I = @Type(.{ .int = .{
+                .signedness = .unsigned,
+                .bits = @bitSizeOf(@TypeOf(rhs)),
+            } });
+            return @as(I, @bitCast(rhs)) & @as(I, 1) << (@bitSizeOf(I) - 1) != 0;
+        },
         .vector => |vector| {
-            const V = @Vector(vector.len, @Type(.{ .int = .{
-                .signedness = .signed,
+            const I = @Type(.{ .int = .{
+                .signedness = .unsigned,
                 .bits = @bitSizeOf(vector.child),
-            } }));
-            return @as(V, @bitCast(rhs)) < @as(V, @splat(0));
+            } });
+            const V = @Vector(vector.len, I);
+            return @as(V, @bitCast(rhs)) & @as(V, @splat(@as(I, 1) << (@bitSizeOf(I) - 1))) != @as(V, @splat(0));
         },
     }
 }
@@ -40,7 +44,7 @@ inline fn boolAnd(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
         .bool => return lhs and rhs,
         .vector => |vector| switch (vector.child) {
             bool => {
-                const Bits = @Vector(vector.len, u1);
+                const Bits = @Type(.{ .int = .{ .signedness = .unsigned, .bits = vector.len } });
                 const lhs_bits: Bits = @bitCast(lhs);
                 const rhs_bits: Bits = @bitCast(rhs);
                 return @bitCast(lhs_bits & rhs_bits);
@@ -56,7 +60,7 @@ inline fn boolOr(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
         .bool => return lhs or rhs,
         .vector => |vector| switch (vector.child) {
             bool => {
-                const Bits = @Vector(vector.len, u1);
+                const Bits = @Type(.{ .int = .{ .signedness = .unsigned, .bits = vector.len } });
                 const lhs_bits: Bits = @bitCast(lhs);
                 const rhs_bits: Bits = @bitCast(rhs);
                 return @bitCast(lhs_bits | rhs_bits);
@@ -1179,6 +1183,145 @@ fn Unary(comptime op: anytype) type {
             try testArgs(@Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, u1025), .{ 0, 1, 1 << 1024 });
         }
+        fn testFloatVectorTypes() !void {
+            try testArgs(@Vector(1, f16), .{
+                -0x1.17cp-12,
+            });
+            try testArgs(@Vector(2, f16), .{
+                0x1.47cp9, 0x1.3acp9,
+            });
+            try testArgs(@Vector(4, f16), .{
+                0x1.ab4p0, -0x1.7fcp-7, -0x1.1cp0, -0x1.f14p12,
+            });
+            try testArgs(@Vector(8, f16), .{
+                -0x1.8d8p8, 0x1.83p10, -0x1.5ap-1, -0x1.d78p13, -0x1.608p12, 0x1.e8p-9, -0x1.688p-10, -0x1.738p9,
+            });
+            try testArgs(@Vector(16, f16), .{
+                0x1.da8p-1, -0x1.ed4p-10, -0x1.dc8p1,  0x1.b78p-14, nan(f16),    0x1.9d8p8,   nan(f16),     0x1.d5p13,
+                -0x1.2dp13, 0x1.6c4p12,   0x1.a9cp-11, -0x1.0ecp8,  0x0.4ccp-14, -0x1.0a8p-6, -0x1.5bcp-14, 0x1.6d8p-9,
+            });
+            try testArgs(@Vector(32, f16), .{
+                0x1.d5cp-6,  -0x1.a98p5,  0x1.49cp5,   -0x1.e4p-1,  -0x1.21p-13, -0x1.c94p-1, -0x1.adcp-5, -0x1.524p-1,
+                -0x1.0d8p-3, -0x1.5c4p-2, 0x1.f84p-2,  0x1.664p1,   -0x1.f64p13, -0x1.bf4p4,  -0x1.4b8p0,  -0x0.f64p-14,
+                -0x1.3f8p1,  0x1.098p2,   -0x1.a44p8,  0x1.048p13,  0x1.fd4p-11, 0x1.18p-9,   -0x1.504p2,  0x1.d04p7,
+                -nan(f16),   0x1.a94p2,   0x0.5e8p-14, -0x1.7acp-7, 0x1.4c8p-3,  0x1.518p-4,  nan(f16),    0x1.8f8p10,
+            });
+            try testArgs(@Vector(64, f16), .{
+                -0x1.c2p2,   0x0.2fcp-14,  0x1.de8p0,    -0x1.714p2,   0x1.f9p-7,    -0x1.11cp-13, -0x1.558p10, -0x1.2acp-7,
+                0x1.348p14,  0x1.2dcp7,    -0x1.8acp-12, -0x1.2cp2,    0x1.868p1,    -0x1.1f8p-14, 0x1.638p7,   -0x1.734p-5,
+                0x0.b98p-14, -0x1.7f4p-12, -0x1.38cp15,  0x1.50cp15,   0x1.91cp8,    0x1.cb4p-1,   0x1.fc4p-13, 0x1.9a4p0,
+                0x1.18p-4,   0x1.60cp10,   0x1.6fp-12,   0x1.b48p6,    0x1.37cp-11,  0x1.424p7,    0x1.44cp13,  0x1.aep5,
+                0x1.968p14,  0x1.e8p13,    -0x1.bp2,     -0x1.644p5,   0x1.de4p-8,   -0x1.5b4p-14, -0x1.4ap1,   -0x1.868p9,
+                -0x1.d14p0,  0x1.d7cp15,   0x1.3c8p14,   0x1.2ccp-14,  -0x1.ee4p8,   0x1.49p-3,    0x1.35cp12,  0x1.d34p6,
+                0x1.7acp3,   -0x1.fa4p2,   0x1.7b4p13,   -0x1.cf4p-12, -0x1.ebcp-10, -0x1.5p-3,    0x1.4bp-6,   0x1.83p12,
+                -0x1.f9cp-8, -0x1.43p-8,   -0x1.99p-1,   -0x1.dacp3,   -0x1.728p-4,  -0x1.03cp4,   0x1.604p-2,  -0x1.0ep13,
+            });
+
+            try testArgs(@Vector(1, f32), .{
+                -0x1.17cp-12,
+            });
+            try testArgs(@Vector(2, f32), .{
+                -0x1.a3123ap90, -0x1.4a2ec6p-54,
+            });
+            try testArgs(@Vector(4, f32), .{
+                -0x1.8a41p77, -0x1.7c54e2p-61, -0x1.498556p-41, 0x1.d77c22p-20,
+            });
+            try testArgs(@Vector(8, f32), .{
+                0x1.943da4p-86, 0x1.528792p95,  -0x1.9c9bfap-26, -0x1.8df936p-90,
+                -0x1.6a70cep56, 0x1.626638p-48, 0x1.7bb2bap-57,  -0x1.ac5104p94,
+            });
+            try testArgs(@Vector(16, f32), .{
+                0x1.157044p115, -0x1.416c04p-111, 0x1.a8f164p-104, 0x1.9b6678p84,
+                -0x1.9d065cp9,  -0x1.e8c4b4p126,  -0x1.ddb968p84,  -0x1.fec8c8p74,
+                0x1.64ffb2p59,  0x1.548922p20,    0x1.7270fcp22,   -0x1.abac68p33,
+                0x1.faabfp33,   -0x1.8aee82p55,   0x1.1bf8fp75,    0x1.33c46ap-66,
+            });
+            try testArgs(@Vector(32, f32), .{
+                -0x1.039b68p37,   -0x1.34de4ap-74, -0x1.05d78ap-76, -0x1.be0f5ap-47,
+                0x1.032204p-38,   0x1.ef8e2ap-78,  -0x1.b013ecp-80, 0x1.71fe4cp99,
+                0x1.abdadap-14,   0x1.56a9a8p-48,  -0x1.8bbd7ep9,   0x1.edd308p-72,
+                -0x1.92fafcp-121, -0x1.50812p19,   0x1.f4ddc4p28,   -0x1.6f0b12p-50,
+                -0x1.12ab02p127,  0x1.24df48p21,   -0x1.993c3p-14,  -0x1.4cc476p-112,
+                0x1.13d9a8p-40,   0x1.a6e652p-9,   -0x1.9c730cp-21, -0x1.a75aaap-70,
+                -0x1.39e632p-111, 0x1.8e8da8p-45,  0x1.b5652cp31,   0x1.258366p44,
+                0x1.d473aap92,    -0x1.951b64p9,   0x1.542edp15,    -0x0.f6222ap-126,
+            });
+
+            try testArgs(@Vector(1, f64), .{
+                -0x1.0114613df6f97p816,
+            });
+            try testArgs(@Vector(2, f64), .{
+                -0x1.8404dad72003cp720, -0x1.6b14b40bcf3b7p-176,
+            });
+            try testArgs(@Vector(4, f64), .{
+                -0x1.04e1acbfddd9cp681, -0x1.ed553cc056da7p-749,
+                0x1.3d3f703a0c893p-905, 0x1.0b35633fa78fp691,
+            });
+            try testArgs(@Vector(8, f64), .{
+                -0x1.901a2a60f0562p-301, -0x1.2516175ad61ecp-447,
+                0x1.e7b12124846bfp564,   0x1.9291384bd7259p209,
+                -0x1.a7bf62f803c98p900,  0x1.4e2e26257bb3p987,
+                -0x1.413ca9a32d894p811,  0x1.61b1dd9432e95p479,
+            });
+            try testArgs(@Vector(16, f64), .{
+                -0x1.8fc7286d95f54p-235,  -0x1.796a7ea8372b6p-837,
+                -0x1.8c0f930539acbp-98,   -0x1.ec80dfbf0b931p-430,
+                -0x1.e3d80c640652fp-1019, 0x1.8241238fb542fp161,
+                -0x1.e1f1a79d50263p137,   -0x1.9ac5cb2771c28p-791,
+                0x1.4d8f00fe881e7p-401,   -0x1.87fbd7bfd99d7p346,
+                -0x1.a8a7cc575335ep1017,  0x1.37bb88dc3fd8bp-355,
+                0x1.9d53d346c0e65p929,    -0x1.bbae3d0229c34p289,
+                -0x1.cb8ef994d5ce5p25,    0x1.ba20af512616ap50,
+            });
+
+            try testArgs(@Vector(1, f80), .{
+                -0x1.a2e9410a7dfedabp-2324,
+            });
+            try testArgs(@Vector(2, f80), .{
+                -0x1.a2e9410a7dfedabp-2324,
+                0x1.2b17da3b9746885p-8665,
+            });
+            try testArgs(@Vector(4, f80), .{
+                -0x1.c488fedb7ab646cep-13007,
+                0x1.e914deaccaa50016p2073,
+                -0x1.d1c7ae8ec3c9df86p10642,
+                -0x1.2da1658f337fa01p9893,
+            });
+            try testArgs(@Vector(8, f80), .{
+                -0x1.bed8a74c43750656p890,
+                -0x1.7bf57f38004ac976p8481,
+                -0x1.9cdc10ac0657d328p7884,
+                0x1.c86f61883da149fp12293,
+                -0x1.528d6957df6bfdd8p14125,
+                -0x1.5ebb4006d0243bfep14530,
+                -0x1.94b9b18636d12402p-1845,
+                -0x1.25439a6d68add188p5962,
+            });
+
+            try testArgs(@Vector(1, f128), .{
+                -0x1.d1e6fc3b1e66632e7b79051a47dap14300,
+            });
+            try testArgs(@Vector(2, f128), .{
+                0x1.84b3ac8ffe5893b2c6af8d68de9dp-83,
+                -0x1.438ca2c8a0d8e3ee9062d351c46ep-10235,
+            });
+            try testArgs(@Vector(4, f128), .{
+                0x1.04eb03882d4fd1b090e714d3e5ep806,
+                -0x1.4082b29f7c26e701764c915642ffp-6182,
+                -0x1.b6f1e8565e5040415110f18b519ap13383,
+                0x1.1c29f8c162cead9061c5797ea15ap11957,
+            });
+            try testArgs(@Vector(8, f128), .{
+                -0x1.53d7f00cd204d80e5ff5bb665773p11218,
+                -0x1.4daa1c81cffe28e8fa5cd703c287p2362,
+                -0x1.cc6a71c3ad4560871efdbd025cd7p-8116,
+                -0x1.87f8553cf8772fb6b78e7df3e3bap14523,
+                -0x1.14b6880f6678f86dfb543dde1c6ep2105,
+                0x1.9d2d4398414da9d857e76e8fd7ccp-13668,
+                0x1.a37f07af240ded458d103c022064p-1158,
+                0x1.425d53e6bd6070b847e5da1ed593p1394,
+            });
+        }
     };
 }
 
@@ -1601,6 +1744,7 @@ test abs {
     try Unary(abs).testIntTypes();
     try Unary(abs).testIntVectorTypes();
     try Unary(abs).testFloatTypes();
+    try Unary(abs).testFloatVectorTypes();
 }
 
 inline fn clz(comptime Type: type, rhs: Type) @TypeOf(@clz(rhs)) {
test/behavior/abs.zig
@@ -335,12 +335,12 @@ fn testAbsUnsignedIntVectors(comptime len: comptime_int) !void {
 
 test "@abs float vectors" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     // https://github.com/ziglang/zig/issues/12827
     if (builtin.zig_backend == .stage2_llvm and
test/src/Debugger.zig
@@ -526,8 +526,9 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
                 \\        var null_u32: ?u32 = null;
                 \\        var maybe_u32: ?u32 = null;
                 \\        var nonnull_u32: ?u32 = 456;
+                \\        null_u32 = null_u32;
                 \\        maybe_u32 = 123;
-                \\        _ = .{ &null_u32, &nonnull_u32 };
+                \\        nonnull_u32 = nonnull_u32;
                 \\    }
                 \\}
                 \\
@@ -539,7 +540,7 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
         \\frame variable -- null_u32 maybe_u32 nonnull_u32
         \\breakpoint delete --force 1
         \\
-        \\breakpoint set --file optionals.zig --source-pattern-regexp '_ = \.{ &null_u32, &nonnull_u32 };'
+        \\breakpoint set --file optionals.zig --source-pattern-regexp 'nonnull_u32 = nonnull_u32;'
         \\process continue
         \\frame variable --show-types -- null_u32 maybe_u32 nonnull_u32
         \\breakpoint delete --force 2
@@ -1285,10 +1286,12 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
                 \\    mod1.m1cfi(r0pai ^ 8);
                 \\}
                 \\pub fn r0cf(r0ca: u32) void {
-                \\    _ = r0ca;
+                \\    var discard = r0ca;
+                \\    _ = &discard;
                 \\}
                 \\pub inline fn r0cfi(r0cai: u32) void {
-                \\    _ = r0cai;
+                \\    var discard = r0cai;
+                \\    _ = &discard;
                 \\}
                 \\pub fn main() void {
                 \\    root0.r0pf(12);
@@ -1331,10 +1334,12 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
                 \\    mod1.m1cfi(r1pai ^ 8);
                 \\}
                 \\pub fn r1cf(r1ca: u32) void {
-                \\    _ = r1ca;
+                \\    var discard = r1ca;
+                \\    _ = &discard;
                 \\}
                 \\pub inline fn r1cfi(r1cai: u32) void {
-                \\    _ = r1cai;
+                \\    var discard = r1cai;
+                \\    _ = &discard;
                 \\}
                 \\
                 ,
@@ -1368,10 +1373,12 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
                 \\    mod1.m1cfi(m0pai ^ 8);
                 \\}
                 \\pub fn m0cf(m0ca: u32) void {
-                \\    _ = m0ca;
+                \\    var discard = m0ca;
+                \\    _ = &discard;
                 \\}
                 \\pub inline fn m0cfi(m0cai: u32) void {
-                \\    _ = m0cai;
+                \\    var discard = m0cai;
+                \\    _ = &discard;
                 \\}
                 \\
                 ,
@@ -1404,10 +1411,12 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
                 \\    mod1.m1cfi(m1pai ^ 8);
                 \\}
                 \\pub fn m1cf(m1ca: u32) void {
-                \\    _ = m1ca;
+                \\    var discard = m1ca;
+                \\    _ = &discard;
                 \\}
                 \\pub inline fn m1cfi(m1cai: u32) void {
-                \\    _ = m1cai;
+                \\    var discard = m1cai;
+                \\    _ = &discard;
                 \\}
                 \\
                 ,
@@ -1416,13 +1425,13 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
         \\settings set frame-format 'frame #${frame.index}:{ ${module.file.basename}{\`${function.name-with-args}{${frame.no-debug}${function.pc-offset}}}}{ at ${line.file.basename}:${line.number}{:${line.column}}}{${function.is-optimized} [opt]}{${frame.is-artificial} [artificial]}\n'
         \\
         \\breakpoint set --file root0.zig --line 26
-        \\breakpoint set --file root0.zig --line 29
+        \\breakpoint set --file root0.zig --line 30
         \\breakpoint set --file root1.zig --line 26
-        \\breakpoint set --file root1.zig --line 29
+        \\breakpoint set --file root1.zig --line 30
         \\breakpoint set --file mod0.zig --line 26
-        \\breakpoint set --file mod0.zig --line 29
+        \\breakpoint set --file mod0.zig --line 30
         \\breakpoint set --file mod1.zig --line 26
-        \\breakpoint set --file mod1.zig --line 29
+        \\breakpoint set --file mod1.zig --line 30
         \\
         \\process launch
         \\thread backtrace --count 3
@@ -1563,259 +1572,259 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
         &.{
             \\  * frame #0: inline_call`root0.r0cf(r0ca=13) at root0.zig:26:5
             \\    frame #1: inline_call`root0.r0pf(r0pa=12) at root0.zig:6:15
-            \\    frame #2: inline_call`root0.main at root0.zig:32:15
+            \\    frame #2: inline_call`root0.main at root0.zig:34:15
             ,
-            \\  * frame #0: inline_call`root0.r0pf [inlined] r0cfi(r0cai=14) at root0.zig:29:5
+            \\  * frame #0: inline_call`root0.r0pf [inlined] r0cfi(r0cai=14) at root0.zig:30:5
             \\    frame #1: inline_call`root0.r0pf(r0pa=12) at root0.zig:7:16
-            \\    frame #2: inline_call`root0.main at root0.zig:32:15
+            \\    frame #2: inline_call`root0.main at root0.zig:34:15
             ,
             \\  * frame #0: inline_call`root1.r1cf(r1ca=15) at root1.zig:26:5
             \\    frame #1: inline_call`root0.r0pf(r0pa=12) at root0.zig:8:15
-            \\    frame #2: inline_call`root0.main at root0.zig:32:15
+            \\    frame #2: inline_call`root0.main at root0.zig:34:15
             ,
-            \\  * frame #0: inline_call`root0.r0pf [inlined] r1cfi(r1cai=8) at root1.zig:29:5
+            \\  * frame #0: inline_call`root0.r0pf [inlined] r1cfi(r1cai=8) at root1.zig:30:5
             \\    frame #1: inline_call`root0.r0pf(r0pa=12) at root0.zig:9:16
-            \\    frame #2: inline_call`root0.main at root0.zig:32:15
+            \\    frame #2: inline_call`root0.main at root0.zig:34:15
             ,
             \\  * frame #0: inline_call`mod0.m0cf(m0ca=9) at mod0.zig:26:5
             \\    frame #1: inline_call`root0.r0pf(r0pa=12) at root0.zig:10:14
-            \\    frame #2: inline_call`root0.main at root0.zig:32:15
+            \\    frame #2: inline_call`root0.main at root0.zig:34:15
             ,
-            \\  * frame #0: inline_call`root0.r0pf [inlined] m0cfi(m0cai=10) at mod0.zig:29:5
+            \\  * frame #0: inline_call`root0.r0pf [inlined] m0cfi(m0cai=10) at mod0.zig:30:5
             \\    frame #1: inline_call`root0.r0pf(r0pa=12) at root0.zig:11:15
-            \\    frame #2: inline_call`root0.main at root0.zig:32:15
+            \\    frame #2: inline_call`root0.main at root0.zig:34:15
             ,
             \\  * frame #0: inline_call`mod1.m1cf(m1ca=11) at mod1.zig:26:5
             \\    frame #1: inline_call`root0.r0pf(r0pa=12) at root0.zig:12:14
-            \\    frame #2: inline_call`root0.main at root0.zig:32:15
+            \\    frame #2: inline_call`root0.main at root0.zig:34:15
             ,
-            \\  * frame #0: inline_call`root0.r0pf [inlined] m1cfi(m1cai=4) at mod1.zig:29:5
+            \\  * frame #0: inline_call`root0.r0pf [inlined] m1cfi(m1cai=4) at mod1.zig:30:5
             \\    frame #1: inline_call`root0.r0pf(r0pa=12) at root0.zig:13:15
-            \\    frame #2: inline_call`root0.main at root0.zig:32:15
+            \\    frame #2: inline_call`root0.main at root0.zig:34:15
             ,
             \\  * frame #0: inline_call`root0.r0cf(r0ca=22) at root0.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] r0pfi(r0pai=23) at root0.zig:16:15
-            \\    frame #2: inline_call`root0.main at root0.zig:33:16
+            \\    frame #2: inline_call`root0.main at root0.zig:35:16
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] r0cfi(r0cai=21) at root0.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] r0cfi(r0cai=21) at root0.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] r0pfi(r0pai=23) at root0.zig:17:16
-            \\    frame #2: inline_call`root0.main at root0.zig:33:16
+            \\    frame #2: inline_call`root0.main at root0.zig:35:16
             ,
             \\  * frame #0: inline_call`root1.r1cf(r1ca=20) at root1.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] r0pfi(r0pai=23) at root0.zig:18:15
-            \\    frame #2: inline_call`root0.main at root0.zig:33:16
+            \\    frame #2: inline_call`root0.main at root0.zig:35:16
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] r1cfi(r1cai=19) at root1.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] r1cfi(r1cai=19) at root1.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] r0pfi(r0pai=23) at root0.zig:19:16
-            \\    frame #2: inline_call`root0.main at root0.zig:33:16
+            \\    frame #2: inline_call`root0.main at root0.zig:35:16
             ,
             \\  * frame #0: inline_call`mod0.m0cf(m0ca=18) at mod0.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] r0pfi(r0pai=23) at root0.zig:20:14
-            \\    frame #2: inline_call`root0.main at root0.zig:33:16
+            \\    frame #2: inline_call`root0.main at root0.zig:35:16
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] m0cfi(m0cai=17) at mod0.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] m0cfi(m0cai=17) at mod0.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] r0pfi(r0pai=23) at root0.zig:21:15
-            \\    frame #2: inline_call`root0.main at root0.zig:33:16
+            \\    frame #2: inline_call`root0.main at root0.zig:35:16
             ,
             \\  * frame #0: inline_call`mod1.m1cf(m1ca=16) at mod1.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] r0pfi(r0pai=23) at root0.zig:22:14
-            \\    frame #2: inline_call`root0.main at root0.zig:33:16
+            \\    frame #2: inline_call`root0.main at root0.zig:35:16
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] m1cfi(m1cai=31) at mod1.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] m1cfi(m1cai=31) at mod1.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] r0pfi(r0pai=23) at root0.zig:23:15
-            \\    frame #2: inline_call`root0.main at root0.zig:33:16
+            \\    frame #2: inline_call`root0.main at root0.zig:35:16
             ,
             \\  * frame #0: inline_call`root0.r0cf(r0ca=35) at root0.zig:26:5
             \\    frame #1: inline_call`root1.r1pf(r1pa=34) at root1.zig:6:15
-            \\    frame #2: inline_call`root0.main at root0.zig:34:15
+            \\    frame #2: inline_call`root0.main at root0.zig:36:15
             ,
-            \\  * frame #0: inline_call`root1.r1pf [inlined] r0cfi(r0cai=32) at root0.zig:29:5
+            \\  * frame #0: inline_call`root1.r1pf [inlined] r0cfi(r0cai=32) at root0.zig:30:5
             \\    frame #1: inline_call`root1.r1pf(r1pa=34) at root1.zig:7:16
-            \\    frame #2: inline_call`root0.main at root0.zig:34:15
+            \\    frame #2: inline_call`root0.main at root0.zig:36:15
             ,
             \\  * frame #0: inline_call`root1.r1cf(r1ca=33) at root1.zig:26:5
             \\    frame #1: inline_call`root1.r1pf(r1pa=34) at root1.zig:8:15
-            \\    frame #2: inline_call`root0.main at root0.zig:34:15
+            \\    frame #2: inline_call`root0.main at root0.zig:36:15
             ,
-            \\  * frame #0: inline_call`root1.r1pf [inlined] r1cfi(r1cai=38) at root1.zig:29:5
+            \\  * frame #0: inline_call`root1.r1pf [inlined] r1cfi(r1cai=38) at root1.zig:30:5
             \\    frame #1: inline_call`root1.r1pf(r1pa=34) at root1.zig:9:16
-            \\    frame #2: inline_call`root0.main at root0.zig:34:15
+            \\    frame #2: inline_call`root0.main at root0.zig:36:15
             ,
             \\  * frame #0: inline_call`mod0.m0cf(m0ca=39) at mod0.zig:26:5
             \\    frame #1: inline_call`root1.r1pf(r1pa=34) at root1.zig:10:14
-            \\    frame #2: inline_call`root0.main at root0.zig:34:15
+            \\    frame #2: inline_call`root0.main at root0.zig:36:15
             ,
-            \\  * frame #0: inline_call`root1.r1pf [inlined] m0cfi(m0cai=36) at mod0.zig:29:5
+            \\  * frame #0: inline_call`root1.r1pf [inlined] m0cfi(m0cai=36) at mod0.zig:30:5
             \\    frame #1: inline_call`root1.r1pf(r1pa=34) at root1.zig:11:15
-            \\    frame #2: inline_call`root0.main at root0.zig:34:15
+            \\    frame #2: inline_call`root0.main at root0.zig:36:15
             ,
             \\  * frame #0: inline_call`mod1.m1cf(m1ca=37) at mod1.zig:26:5
             \\    frame #1: inline_call`root1.r1pf(r1pa=34) at root1.zig:12:14
-            \\    frame #2: inline_call`root0.main at root0.zig:34:15
+            \\    frame #2: inline_call`root0.main at root0.zig:36:15
             ,
-            \\  * frame #0: inline_call`root1.r1pf [inlined] m1cfi(m1cai=42) at mod1.zig:29:5
+            \\  * frame #0: inline_call`root1.r1pf [inlined] m1cfi(m1cai=42) at mod1.zig:30:5
             \\    frame #1: inline_call`root1.r1pf(r1pa=34) at root1.zig:13:15
-            \\    frame #2: inline_call`root0.main at root0.zig:34:15
+            \\    frame #2: inline_call`root0.main at root0.zig:36:15
             ,
             \\  * frame #0: inline_call`root0.r0cf(r0ca=44) at root0.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] r1pfi(r1pai=45) at root1.zig:16:15
-            \\    frame #2: inline_call`root0.main at root0.zig:35:16
+            \\    frame #2: inline_call`root0.main at root0.zig:37:16
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] r0cfi(r0cai=47) at root0.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] r0cfi(r0cai=47) at root0.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] r1pfi(r1pai=45) at root1.zig:17:16
-            \\    frame #2: inline_call`root0.main at root0.zig:35:16
+            \\    frame #2: inline_call`root0.main at root0.zig:37:16
             ,
             \\  * frame #0: inline_call`root1.r1cf(r1ca=46) at root1.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] r1pfi(r1pai=45) at root1.zig:18:15
-            \\    frame #2: inline_call`root0.main at root0.zig:35:16
+            \\    frame #2: inline_call`root0.main at root0.zig:37:16
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] r1cfi(r1cai=41) at root1.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] r1cfi(r1cai=41) at root1.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] r1pfi(r1pai=45) at root1.zig:19:16
-            \\    frame #2: inline_call`root0.main at root0.zig:35:16
+            \\    frame #2: inline_call`root0.main at root0.zig:37:16
             ,
             \\  * frame #0: inline_call`mod0.m0cf(m0ca=40) at mod0.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] r1pfi(r1pai=45) at root1.zig:20:14
-            \\    frame #2: inline_call`root0.main at root0.zig:35:16
+            \\    frame #2: inline_call`root0.main at root0.zig:37:16
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] m0cfi(m0cai=43) at mod0.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] m0cfi(m0cai=43) at mod0.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] r1pfi(r1pai=45) at root1.zig:21:15
-            \\    frame #2: inline_call`root0.main at root0.zig:35:16
+            \\    frame #2: inline_call`root0.main at root0.zig:37:16
             ,
             \\  * frame #0: inline_call`mod1.m1cf(m1ca=42) at mod1.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] r1pfi(r1pai=45) at root1.zig:22:14
-            \\    frame #2: inline_call`root0.main at root0.zig:35:16
+            \\    frame #2: inline_call`root0.main at root0.zig:37:16
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] m1cfi(m1cai=37) at mod1.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] m1cfi(m1cai=37) at mod1.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] r1pfi(r1pai=45) at root1.zig:23:15
-            \\    frame #2: inline_call`root0.main at root0.zig:35:16
+            \\    frame #2: inline_call`root0.main at root0.zig:37:16
             ,
             \\  * frame #0: inline_call`root0.r0cf(r0ca=57) at root0.zig:26:5
             \\    frame #1: inline_call`mod0.m0pf(m0pa=56) at mod0.zig:6:15
-            \\    frame #2: inline_call`root0.main at root0.zig:36:14
+            \\    frame #2: inline_call`root0.main at root0.zig:38:14
             ,
-            \\  * frame #0: inline_call`mod0.m0pf [inlined] r0cfi(r0cai=58) at root0.zig:29:5
+            \\  * frame #0: inline_call`mod0.m0pf [inlined] r0cfi(r0cai=58) at root0.zig:30:5
             \\    frame #1: inline_call`mod0.m0pf(m0pa=56) at mod0.zig:7:16
-            \\    frame #2: inline_call`root0.main at root0.zig:36:14
+            \\    frame #2: inline_call`root0.main at root0.zig:38:14
             ,
             \\  * frame #0: inline_call`root1.r1cf(r1ca=59) at root1.zig:26:5
             \\    frame #1: inline_call`mod0.m0pf(m0pa=56) at mod0.zig:8:15
-            \\    frame #2: inline_call`root0.main at root0.zig:36:14
+            \\    frame #2: inline_call`root0.main at root0.zig:38:14
             ,
-            \\  * frame #0: inline_call`mod0.m0pf [inlined] r1cfi(r1cai=60) at root1.zig:29:5
+            \\  * frame #0: inline_call`mod0.m0pf [inlined] r1cfi(r1cai=60) at root1.zig:30:5
             \\    frame #1: inline_call`mod0.m0pf(m0pa=56) at mod0.zig:9:16
-            \\    frame #2: inline_call`root0.main at root0.zig:36:14
+            \\    frame #2: inline_call`root0.main at root0.zig:38:14
             ,
             \\  * frame #0: inline_call`mod0.m0cf(m0ca=61) at mod0.zig:26:5
             \\    frame #1: inline_call`mod0.m0pf(m0pa=56) at mod0.zig:10:14
-            \\    frame #2: inline_call`root0.main at root0.zig:36:14
+            \\    frame #2: inline_call`root0.main at root0.zig:38:14
             ,
-            \\  * frame #0: inline_call`mod0.m0pf [inlined] m0cfi(m0cai=62) at mod0.zig:29:5
+            \\  * frame #0: inline_call`mod0.m0pf [inlined] m0cfi(m0cai=62) at mod0.zig:30:5
             \\    frame #1: inline_call`mod0.m0pf(m0pa=56) at mod0.zig:11:15
-            \\    frame #2: inline_call`root0.main at root0.zig:36:14
+            \\    frame #2: inline_call`root0.main at root0.zig:38:14
             ,
             \\  * frame #0: inline_call`mod1.m1cf(m1ca=63) at mod1.zig:26:5
             \\    frame #1: inline_call`mod0.m0pf(m0pa=56) at mod0.zig:12:14
-            \\    frame #2: inline_call`root0.main at root0.zig:36:14
+            \\    frame #2: inline_call`root0.main at root0.zig:38:14
             ,
-            \\  * frame #0: inline_call`mod0.m0pf [inlined] m1cfi(m1cai=48) at mod1.zig:29:5
+            \\  * frame #0: inline_call`mod0.m0pf [inlined] m1cfi(m1cai=48) at mod1.zig:30:5
             \\    frame #1: inline_call`mod0.m0pf(m0pa=56) at mod0.zig:13:15
-            \\    frame #2: inline_call`root0.main at root0.zig:36:14
+            \\    frame #2: inline_call`root0.main at root0.zig:38:14
             ,
             \\  * frame #0: inline_call`root0.r0cf(r0ca=66) at root0.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] m0pfi(m0pai=67) at mod0.zig:16:15
-            \\    frame #2: inline_call`root0.main at root0.zig:37:15
+            \\    frame #2: inline_call`root0.main at root0.zig:39:15
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] r0cfi(r0cai=65) at root0.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] r0cfi(r0cai=65) at root0.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] m0pfi(m0pai=67) at mod0.zig:17:16
-            \\    frame #2: inline_call`root0.main at root0.zig:37:15
+            \\    frame #2: inline_call`root0.main at root0.zig:39:15
             ,
             \\  * frame #0: inline_call`root1.r1cf(r1ca=64) at root1.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] m0pfi(m0pai=67) at mod0.zig:18:15
-            \\    frame #2: inline_call`root0.main at root0.zig:37:15
+            \\    frame #2: inline_call`root0.main at root0.zig:39:15
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] r1cfi(r1cai=71) at root1.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] r1cfi(r1cai=71) at root1.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] m0pfi(m0pai=67) at mod0.zig:19:16
-            \\    frame #2: inline_call`root0.main at root0.zig:37:15
+            \\    frame #2: inline_call`root0.main at root0.zig:39:15
             ,
             \\  * frame #0: inline_call`mod0.m0cf(m0ca=70) at mod0.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] m0pfi(m0pai=67) at mod0.zig:20:14
-            \\    frame #2: inline_call`root0.main at root0.zig:37:15
+            \\    frame #2: inline_call`root0.main at root0.zig:39:15
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] m0cfi(m0cai=69) at mod0.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] m0cfi(m0cai=69) at mod0.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] m0pfi(m0pai=67) at mod0.zig:21:15
-            \\    frame #2: inline_call`root0.main at root0.zig:37:15
+            \\    frame #2: inline_call`root0.main at root0.zig:39:15
             ,
             \\  * frame #0: inline_call`mod1.m1cf(m1ca=68) at mod1.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] m0pfi(m0pai=67) at mod0.zig:22:14
-            \\    frame #2: inline_call`root0.main at root0.zig:37:15
+            \\    frame #2: inline_call`root0.main at root0.zig:39:15
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] m1cfi(m1cai=75) at mod1.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] m1cfi(m1cai=75) at mod1.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] m0pfi(m0pai=67) at mod0.zig:23:15
-            \\    frame #2: inline_call`root0.main at root0.zig:37:15
+            \\    frame #2: inline_call`root0.main at root0.zig:39:15
             ,
             \\  * frame #0: inline_call`root0.r0cf(r0ca=79) at root0.zig:26:5
             \\    frame #1: inline_call`mod1.m1pf(m1pa=78) at mod1.zig:6:15
-            \\    frame #2: inline_call`root0.main at root0.zig:38:14
+            \\    frame #2: inline_call`root0.main at root0.zig:40:14
             ,
-            \\  * frame #0: inline_call`mod1.m1pf [inlined] r0cfi(r0cai=76) at root0.zig:29:5
+            \\  * frame #0: inline_call`mod1.m1pf [inlined] r0cfi(r0cai=76) at root0.zig:30:5
             \\    frame #1: inline_call`mod1.m1pf(m1pa=78) at mod1.zig:7:16
-            \\    frame #2: inline_call`root0.main at root0.zig:38:14
+            \\    frame #2: inline_call`root0.main at root0.zig:40:14
             ,
             \\  * frame #0: inline_call`root1.r1cf(r1ca=77) at root1.zig:26:5
             \\    frame #1: inline_call`mod1.m1pf(m1pa=78) at mod1.zig:8:15
-            \\    frame #2: inline_call`root0.main at root0.zig:38:14
+            \\    frame #2: inline_call`root0.main at root0.zig:40:14
             ,
-            \\  * frame #0: inline_call`mod1.m1pf [inlined] r1cfi(r1cai=74) at root1.zig:29:5
+            \\  * frame #0: inline_call`mod1.m1pf [inlined] r1cfi(r1cai=74) at root1.zig:30:5
             \\    frame #1: inline_call`mod1.m1pf(m1pa=78) at mod1.zig:9:16
-            \\    frame #2: inline_call`root0.main at root0.zig:38:14
+            \\    frame #2: inline_call`root0.main at root0.zig:40:14
             ,
             \\  * frame #0: inline_call`mod0.m0cf(m0ca=75) at mod0.zig:26:5
             \\    frame #1: inline_call`mod1.m1pf(m1pa=78) at mod1.zig:10:14
-            \\    frame #2: inline_call`root0.main at root0.zig:38:14
+            \\    frame #2: inline_call`root0.main at root0.zig:40:14
             ,
-            \\  * frame #0: inline_call`mod1.m1pf [inlined] m0cfi(m0cai=72) at mod0.zig:29:5
+            \\  * frame #0: inline_call`mod1.m1pf [inlined] m0cfi(m0cai=72) at mod0.zig:30:5
             \\    frame #1: inline_call`mod1.m1pf(m1pa=78) at mod1.zig:11:15
-            \\    frame #2: inline_call`root0.main at root0.zig:38:14
+            \\    frame #2: inline_call`root0.main at root0.zig:40:14
             ,
             \\  * frame #0: inline_call`mod1.m1cf(m1ca=73) at mod1.zig:26:5
             \\    frame #1: inline_call`mod1.m1pf(m1pa=78) at mod1.zig:12:14
-            \\    frame #2: inline_call`root0.main at root0.zig:38:14
+            \\    frame #2: inline_call`root0.main at root0.zig:40:14
             ,
-            \\  * frame #0: inline_call`mod1.m1pf [inlined] m1cfi(m1cai=70) at mod1.zig:29:5
+            \\  * frame #0: inline_call`mod1.m1pf [inlined] m1cfi(m1cai=70) at mod1.zig:30:5
             \\    frame #1: inline_call`mod1.m1pf(m1pa=78) at mod1.zig:13:15
-            \\    frame #2: inline_call`root0.main at root0.zig:38:14
+            \\    frame #2: inline_call`root0.main at root0.zig:40:14
             ,
             \\  * frame #0: inline_call`root0.r0cf(r0ca=88) at root0.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] m1pfi(m1pai=89) at mod1.zig:16:15
-            \\    frame #2: inline_call`root0.main at root0.zig:39:15
+            \\    frame #2: inline_call`root0.main at root0.zig:41:15
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] r0cfi(r0cai=91) at root0.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] r0cfi(r0cai=91) at root0.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] m1pfi(m1pai=89) at mod1.zig:17:16
-            \\    frame #2: inline_call`root0.main at root0.zig:39:15
+            \\    frame #2: inline_call`root0.main at root0.zig:41:15
             ,
             \\  * frame #0: inline_call`root1.r1cf(r1ca=90) at root1.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] m1pfi(m1pai=89) at mod1.zig:18:15
-            \\    frame #2: inline_call`root0.main at root0.zig:39:15
+            \\    frame #2: inline_call`root0.main at root0.zig:41:15
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] r1cfi(r1cai=93) at root1.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] r1cfi(r1cai=93) at root1.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] m1pfi(m1pai=89) at mod1.zig:19:16
-            \\    frame #2: inline_call`root0.main at root0.zig:39:15
+            \\    frame #2: inline_call`root0.main at root0.zig:41:15
             ,
             \\  * frame #0: inline_call`mod0.m0cf(m0ca=92) at mod0.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] m1pfi(m1pai=89) at mod1.zig:20:14
-            \\    frame #2: inline_call`root0.main at root0.zig:39:15
+            \\    frame #2: inline_call`root0.main at root0.zig:41:15
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] m0cfi(m0cai=95) at mod0.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] m0cfi(m0cai=95) at mod0.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] m1pfi(m1pai=89) at mod1.zig:21:15
-            \\    frame #2: inline_call`root0.main at root0.zig:39:15
+            \\    frame #2: inline_call`root0.main at root0.zig:41:15
             ,
             \\  * frame #0: inline_call`mod1.m1cf(m1ca=94) at mod1.zig:26:5
             \\    frame #1: inline_call`root0.main [inlined] m1pfi(m1pai=89) at mod1.zig:22:14
-            \\    frame #2: inline_call`root0.main at root0.zig:39:15
+            \\    frame #2: inline_call`root0.main at root0.zig:41:15
             ,
-            \\  * frame #0: inline_call`root0.main [inlined] m1cfi(m1cai=81) at mod1.zig:29:5
+            \\  * frame #0: inline_call`root0.main [inlined] m1cfi(m1cai=81) at mod1.zig:30:5
             \\    frame #1: inline_call`root0.main [inlined] m1pfi(m1pai=89) at mod1.zig:23:15
-            \\    frame #2: inline_call`root0.main at root0.zig:39:15
+            \\    frame #2: inline_call`root0.main at root0.zig:41:15
         },
     );
     db.addLldbTest(