Commit 50b40c9621

Jacob Young <jacobly0@users.noreply.github.com>
2025-02-24 10:29:05
x86_64: rewrite wrapping add/sub
1 parent 6c3cbb0
Changed files (3)
src
arch
test
behavior
src/arch/x86_64/CodeGen.zig
@@ -2454,9 +2454,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
         try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1);
         switch (air_tags[@intFromEnum(inst)]) {
             // zig fmt: off
-            .add_wrap        => try cg.airBinOp(inst, .add_wrap),
-            .sub_wrap        => try cg.airBinOp(inst, .sub_wrap),
-
             .shr, .shr_exact => try cg.airShlShrBinOp(inst),
             .shl, .shl_exact => try cg.airShlShrBinOp(inst),
 
@@ -2497,7 +2494,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     if (arg != .none) break;
                 } else try cg.airDbgVarArgs();
             },
-            .add, .add_optimized => |air_tag| if (use_old) try cg.airBinOp(inst, .add) else {
+            .add, .add_optimized, .add_wrap => |air_tag| if (use_old) try cg.airBinOp(inst, switch (air_tag) {
+                else => unreachable,
+                .add, .add_optimized => .add,
+                .add_wrap => .add_wrap,
+            }) else {
                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
                 var res: [1]Temp = undefined;
@@ -3399,10 +3400,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .mem, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_add_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_size_add_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp3p, .memia(.dst0, .tmp0, .add_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp3p, .memia(.dst0, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp4p, .sa(.src0, .sub_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
                         .{ .@"1:", ._, .mov, .tmp5q, .leasi(.tmp1q, .@"8", .tmp4), ._, ._ },
@@ -3437,10 +3438,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .mem, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_add_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_size_add_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp3p, .memia(.dst0, .tmp0, .add_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp3p, .memia(.dst0, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp4p, .sa(.src0, .sub_elem_size_div_4), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
                         .{ .@"1:", ._, .mov, .tmp5d, .leasi(.tmp1d, .@"4", .tmp4), ._, ._ },
@@ -4352,10 +4353,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     }),
                     else => |e| return e,
                 };
+                switch (air_tag) {
+                    else => unreachable,
+                    .add, .add_optimized => {},
+                    .add_wrap => res[0].wrapInt(cg) catch |err| switch (err) {
+                        error.SelectFailed => return cg.fail("failed to select wrap {} {} {}", .{
+                            cg.typeOf(bin_op.lhs).fmt(pt),
+                            ops[0].tracking(cg),
+                            ops[1].tracking(cg),
+                        }),
+                        else => |e| return e,
+                    },
+                }
                 try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
             },
             .add_safe => unreachable,
-            .sub, .sub_optimized => |air_tag| if (use_old) try cg.airBinOp(inst, .sub) else {
+            .sub, .sub_optimized, .sub_wrap => |air_tag| if (use_old) try cg.airBinOp(inst, switch (air_tag) {
+                else => unreachable,
+                .sub, .sub_optimized => .sub,
+                .sub_wrap => .sub_wrap,
+            }) else {
                 const bin_op = air_datas[@intFromEnum(inst)].bin_op;
                 var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
                 var res: [1]Temp = undefined;
@@ -5229,10 +5246,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .mem, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_add_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_size_add_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp3p, .memia(.dst0, .tmp0, .add_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp3p, .memia(.dst0, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp4p, .sa(.src0, .sub_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
                         .{ .@"1:", ._, .mov, .tmp5q, .leasi(.tmp1q, .@"8", .tmp4), ._, ._ },
@@ -5267,10 +5284,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     },
                     .dst_temps = .{ .mem, .unused },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_add_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_size_add_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp3p, .memia(.dst0, .tmp0, .add_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp3p, .memia(.dst0, .tmp0, .add_unaligned_size_add_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp4p, .sa(.src0, .sub_elem_size_div_4), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
                         .{ .@"1:", ._, .mov, .tmp5d, .leasi(.tmp1d, .@"4", .tmp4), ._, ._ },
@@ -6200,6 +6217,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     }),
                     else => |e| return e,
                 };
+                switch (air_tag) {
+                    else => unreachable,
+                    .sub, .sub_optimized => {},
+                    .sub_wrap => res[0].wrapInt(cg) catch |err| switch (err) {
+                        error.SelectFailed => return cg.fail("failed to select wrap {} {} {}", .{
+                            cg.typeOf(bin_op.lhs).fmt(pt),
+                            ops[0].tracking(cg),
+                            ops[1].tracking(cg),
+                        }),
+                        else => |e| return e,
+                    },
+                }
                 try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
             },
             .sub_safe => unreachable,
@@ -18547,8 +18576,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .dword } },
                         .any,
                     },
                     .patterns = &.{
@@ -18682,8 +18711,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .dword } },
                         .any,
                     },
                     .patterns = &.{
@@ -18819,8 +18848,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 32 } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 32 } },
                         .any,
                     },
                     .patterns = &.{
@@ -18971,8 +19000,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 32 } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 32 } },
                         .any,
                     },
                     .patterns = &.{
@@ -19082,8 +19111,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -19097,8 +19126,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse4_2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -19128,8 +19157,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } },
-                        .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .yword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -19143,8 +19172,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } },
-                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .yword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -19178,8 +19207,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -19213,8 +19242,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse4_2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -19318,8 +19347,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -19352,8 +19381,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse4_2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -19387,8 +19416,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
-                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .yword, .is = 64 } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .yword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -19421,8 +19450,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .yword, .is = 64 } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .yword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -19460,8 +19489,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -19499,8 +19528,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse4_2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -19628,20 +19657,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{ .mem, .unused },
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sia(-1, .none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, 8), ._, ._ },
                         .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memiad(.src1, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memiad(.src1, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
                         .{ ._, ._l, .cmov, .tmp1p, .tmp2p, ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
                         .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
@@ -19669,20 +19698,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{ .mem, .unused },
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sia(-1, .none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, 8), ._, ._ },
                         .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
                         .{ ._, ._nl, .j, .@"1f", ._, ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memiad(.src1, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
-                        .{ .@"1:", ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src1, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
+                        .{ .@"1:", ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
                         .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
@@ -19710,18 +19739,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{ .mem, .unused },
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, 8), ._, ._ },
                         .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_sub_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
                         .{ ._, ._b, .cmov, .tmp1p, .tmp2p, ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
                         .{ ._, ._, .@"test", .tmp0p, .tmp0p, ._, ._ },
@@ -19749,18 +19778,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{ .mem, .unused },
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, 8), ._, ._ },
                         .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
                         .{ ._, ._nb, .j, .@"1f", ._, ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memia(.src1, .tmp0, .add_size_sub_elem_size), ._, ._ },
-                        .{ .@"1:", ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src1, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
+                        .{ .@"1:", ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
                         .{ ._, ._, .@"test", .tmp0p, .tmp0p, ._, ._ },
@@ -22702,8 +22731,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .dword } },
                         .any,
                     },
                     .patterns = &.{
@@ -22837,8 +22866,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .dword } },
                         .any,
                     },
                     .patterns = &.{
@@ -22974,8 +23003,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 32 } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 32 } },
                         .any,
                     },
                     .patterns = &.{
@@ -23126,8 +23155,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 32 } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 32 } },
                         .any,
                     },
                     .patterns = &.{
@@ -23237,8 +23266,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -23254,8 +23283,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse4_2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
-                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -23285,8 +23314,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } },
-                        .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_signed_or_exclusive_int = .{ .of = .yword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -23302,8 +23331,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } },
-                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .yword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -23337,8 +23366,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -23372,8 +23401,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse4_2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
-                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_or_exclusive_int = .{ .of = .xword, .is = .qword } },
                         .any,
                     },
                     .patterns = &.{
@@ -23477,8 +23506,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -23511,8 +23540,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse4_2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
-                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -23546,8 +23575,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx2, null, null, null },
                     .src_constraints = .{
-                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
-                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .yword, .is = 64 } },
+                        .{ .scalar_exact_unsigned_int = .{ .of = .yword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -23580,8 +23609,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .yword, .is = 64 } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .yword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -23619,8 +23648,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .avx, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -23658,8 +23687,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .sse4_2, null, null, null },
                     .src_constraints = .{
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
-                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
+                        .{ .multiple_scalar_exact_unsigned_int = .{ .of = .xword, .is = 64 } },
                         .any,
                     },
                     .patterns = &.{
@@ -23787,20 +23816,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{ .mem, .unused },
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sia(-1, .none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, 8), ._, ._ },
                         .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memiad(.src1, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memiad(.src1, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
                         .{ ._, ._ge, .cmov, .tmp1p, .tmp2p, ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
                         .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
@@ -23828,20 +23857,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{ .mem, .unused },
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sia(-1, .none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, 8), ._, ._ },
                         .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
                         .{ ._, ._nge, .j, .@"1f", ._, ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memiad(.src1, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
-                        .{ .@"1:", ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src1, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
+                        .{ .@"1:", ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_unaligned_size_sub_elem_size, 8), ._, ._ },
                         .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
@@ -23869,18 +23898,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{ .mem, .unused },
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, 8), ._, ._ },
                         .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_sub_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
                         .{ ._, ._ae, .cmov, .tmp1p, .tmp2p, ._, ._ },
-                        .{ ._, ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
                         .{ ._, ._, .@"test", .tmp0p, .tmp0p, ._, ._ },
@@ -23908,18 +23937,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{ .mem, .unused },
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, 8), ._, ._ },
                         .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
                         .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .memia(.src1, .tmp0, .add_size_sub_elem_size), ._, ._ },
-                        .{ .@"1:", ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src1, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
+                        .{ .@"1:", ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_unaligned_size_sub_elem_size), ._, ._ },
                         .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
                         .{ ._, ._, .@"test", .tmp0p, .tmp0p, ._, ._ },
@@ -39252,7 +39281,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
                                 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
                                 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
-                                .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
+                                .{ ._, ._a, .j, .@"1b", ._, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
                                 .{ ._, .fromCond(cc), .set, .tmp2b, ._, ._, ._ },
                                 .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ },
@@ -39434,7 +39463,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
                                 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
                                 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
-                                .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
+                                .{ ._, ._a, .j, .@"1b", ._, ._, ._ },
                                 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
                                 .{ ._, .fromCond(cc), .set, .tmp2b, ._, ._, ._ },
@@ -39620,7 +39649,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                                 .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ },
                                 .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ },
                                 .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ },
-                                .{ ._, ._b, .j, .@"1b", ._, ._, ._ },
+                                .{ ._, ._a, .j, .@"1b", ._, ._, ._ },
                                 .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ },
                                 .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ },
                                 .{ ._, .fromCond(cc), .set, .tmp2b, ._, ._, ._ },
@@ -59051,8 +59080,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ },
-                        .{ ._, .vp_d, .sll, .tmp1y, .tmp1y, .uia(16, .dst0, .sub_bit_size), ._ },
-                        .{ ._, .vp_d, .sra, .tmp1y, .tmp1y, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_d, .sll, .tmp1y, .tmp1y, .uia(32, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_d, .sra, .tmp1y, .tmp1y, .uia(32, .dst0, .sub_bit_size), ._ },
                         .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
@@ -59113,8 +59142,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
-                        .{ ._, .vp_d, .sll, .tmp1x, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._ },
-                        .{ ._, .vp_d, .sra, .tmp1x, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_d, .sll, .tmp1x, .tmp1x, .uia(32, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_d, .sra, .tmp1x, .tmp1x, .uia(32, .dst0, .sub_bit_size), ._ },
                         .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
@@ -59175,8 +59204,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
-                        .{ ._, .p_d, .sll, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
-                        .{ ._, .p_d, .sra, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, .p_d, .sll, .tmp1x, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, .p_d, .sra, .tmp1x, .uia(32, .dst0, .sub_bit_size), ._, ._ },
                         .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
@@ -60838,7 +60867,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
                         .{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
                         .{ ._, ._, .mov, .lead(.tmp2q, 8), .tmp3q, ._, ._ },
-                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(16, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
                         .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
                         .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
@@ -60875,7 +60904,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
                         .{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
                         .{ ._, ._, .mov, .lead(.tmp2q, 8), .tmp3q, ._, ._ },
-                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(16, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
                         .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
@@ -60914,7 +60943,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
                         .{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
                         .{ ._, ._, .mov, .lead(.tmp2q, 8), .tmp3q, ._, ._ },
-                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(16, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
                         .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
                         .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
@@ -60953,7 +60982,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
                         .{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
                         .{ ._, ._, .mov, .lead(.tmp2q, 8), .tmp3q, ._, ._ },
-                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(16, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
                         .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
@@ -61130,7 +61159,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .bzhi, .tmp4q, .lea(.tmp1q), .tmp3q, ._ },
                         .{ ._, ._, .mov, .lea(.tmp2q), .tmp4q, ._, ._ },
                         .{ ._, ._, .mov, .lead(.tmp2q, 8), .si(0), ._, ._ },
-                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(16, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
                         .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
                         .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
@@ -61167,7 +61196,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .bzhi, .tmp4q, .lea(.tmp1q), .tmp3q, ._ },
                         .{ ._, ._, .mov, .lea(.tmp2q), .tmp4q, ._, ._ },
                         .{ ._, ._, .mov, .lead(.tmp2q, 8), .si(0), ._, ._ },
-                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(16, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
                         .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
@@ -61274,7 +61303,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .@"and", .tmp3q, .lea(.tmp1q), ._, ._ },
                         .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
                         .{ ._, ._, .mov, .lead(.tmp2q, 8), .si(0), ._, ._ },
-                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(16, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
                         .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
                         .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
@@ -61311,7 +61340,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .@"and", .tmp3q, .lea(.tmp1q), ._, ._ },
                         .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
                         .{ ._, ._, .mov, .lead(.tmp2q, 8), .si(0), ._, ._ },
-                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(16, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
                         .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
@@ -94747,7 +94776,7 @@ const MoveStrategy = union(enum) {
                 });
             },
             .load_store => |tag| {
-                if (tag[0] == ._ps and tag[1] == .movl) try cg.asmRegisterRegister(.{ .p_, .xor }, dst_reg, dst_reg);
+                if (tag[0] == ._ps and tag[1] == .movl) try cg.asmRegisterRegister(.{ ._ps, .xor }, dst_reg, dst_reg);
                 try cg.asmRegisterMemory(tag, switch (tag[1]) {
                     else => dst_reg,
                     .lea => if (dst_reg.bitSize() >= 32) dst_reg else dst_reg.to32(),
@@ -101383,454 +101412,2823 @@ const Temp = struct {
         try cg.asmOpOnly(.{ .@"rep _sb", .sto });
     }
 
-    /// Supports any `op` using `cg.intInfo(lhs.typeOf(cg)).?.signedness` as the signedness.
-    /// Returns `error.SelectFailed` when `cg.intInfo(lhs.typeOf(cg)) == null`.
-    fn cmpInts(lhs: *Temp, op: std.math.CompareOperator, rhs: *Temp, cg: *CodeGen) Select.Error!Temp {
-        var ops: [2]Temp = .{ lhs.*, rhs.* };
+    fn wrapInt(temp: *Temp, cg: *CodeGen) Select.Error!void {
+        var ops: [1]Temp = .{temp.*};
         var res: [1]Temp = undefined;
-        switch (op) {
-            .lt, .lte, .gte, .gt => {
-                const commute = switch (op) {
-                    .lt, .gte => false,
-                    .lte, .gt => true,
-                    else => unreachable,
-                };
-                if (commute) std.mem.swap(Temp, &ops[0], &ops[1]);
-                try cg.select(&res, &.{.bool}, &ops, comptime &.{ .{
-                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .imm8, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .imm8, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                    },
-                    .dst_temps = .{ .{ .cc = .g }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm8, .none } },
-                        .{ .src = .{ .to_gpr, .imm8, .none } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .l }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .imm8, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .imm8, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                    },
-                    .dst_temps = .{ .{ .cc = .a }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm8, .none } },
-                        .{ .src = .{ .to_gpr, .imm8, .none } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .b }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                    },
-                    .dst_temps = .{ .{ .cc = .g }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm16, .none } },
-                        .{ .src = .{ .to_gpr, .imm16, .none } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .l }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                    },
-                    .dst_temps = .{ .{ .cc = .a }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm16, .none } },
-                        .{ .src = .{ .to_gpr, .imm16, .none } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .b }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                    },
-                    .dst_temps = .{ .{ .cc = .g }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm32, .none } },
-                        .{ .src = .{ .to_gpr, .imm32, .none } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .l }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                    },
-                    .dst_temps = .{ .{ .cc = .a }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm32, .none } },
-                        .{ .src = .{ .to_gpr, .imm32, .none } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .b }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                    },
-                    .dst_temps = .{ .{ .cc = .g }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .simm32, .none } },
-                        .{ .src = .{ .to_gpr, .simm32, .none } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .l }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                    },
-                    .dst_temps = .{ .{ .cc = .a }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .simm32, .none } },
-                        .{ .src = .{ .to_gpr, .simm32, .none } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .b }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{
-                        .{ .remainder_signed_int = .{ .of = .qword, .is = .qword } },
-                        .{ .remainder_signed_int = .{ .of = .qword, .is = .qword } },
-                        .any,
-                    },
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .to_mem, .none } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{ .{ .cc = .l }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_8), ._, ._ },
-                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"8", .tmp0, .add_size, -8), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"8", .tmp0, .add_size, -8), ._, ._ },
-                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
-                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -8), ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{
-                        .{ .remainder_unsigned_int = .{ .of = .qword, .is = .qword } },
-                        .{ .remainder_unsigned_int = .{ .of = .qword, .is = .qword } },
-                        .any,
-                    },
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .to_mem, .none } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{ .{ .cc = .b }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_8), ._, ._ },
-                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"8", .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
-                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{
-                        .{ .remainder_signed_int = .{ .of = .dword, .is = .dword } },
-                        .{ .remainder_signed_int = .{ .of = .dword, .is = .dword } },
-                        .any,
-                    },
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .to_mem, .none } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{ .{ .cc = .l }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_4), ._, ._ },
-                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"4", .tmp0, .add_size, -4), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"4", .tmp0, .add_size, -4), ._, ._ },
-                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
-                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -4), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -4), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{
-                        .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } },
-                        .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } },
-                        .any,
-                    },
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .to_mem, .none } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
-                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{ .{ .cc = .b }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_4), ._, ._ },
-                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
-                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"4", .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"4", .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
-                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
-                    } },
-                } });
-                if (commute) std.mem.swap(Temp, &ops[0], &ops[1]);
+        try cg.select(&res, &.{temp.typeOf(cg)}, &ops, comptime &.{ .{
+            .src_constraints = .{ .{ .exact_int = 8 }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
             },
-            .eq, .neq => {
-                try cg.select(&res, &.{.bool}, &ops, comptime &.{ .{
-                    .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm8, .none } },
-                        .{ .src = .{ .imm8, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .imm8, .none } },
-                        .{ .src = .{ .imm8, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .e }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm16, .none } },
-                        .{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .imm16, .none } },
-                        .{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .e }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .imm32, .none } },
-                        .{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .imm32, .none } },
-                        .{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .e }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .simm32, .none } },
-                        .{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .simm32, .none } },
-                        .{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .mem, .none } },
-                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
-                    },
-                    .dst_temps = .{ .{ .cc = .e }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .sse, .mmx, null, null },
-                    .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_mm, .mem, .none } },
-                        .{ .src = .{ .mem, .to_mut_mm, .none }, .commute = .{ 0, 1 } },
-                        .{ .src = .{ .to_mut_mm, .to_mm, .none } },
-                    },
-                    .extra_temps = .{
-                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
-                        .{ .kind = .{ .rc = .mmx } },
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                        .unused,
-                    },
-                    .dst_temps = .{ .{ .cc = .z }, .unused },
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
-                        .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ },
-                        .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ },
-                        .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .src_constraints = .{ .{ .signed_int = .byte }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._l, .sa, .dst0b, .uia(8, .src0, .sub_bit_size), ._, ._ },
+                .{ ._, ._r, .sa, .dst0b, .uia(8, .src0, .sub_bit_size), ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .unsigned_int = .byte }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .@"and", .dst0b, .ua(.src0, .add_umax), ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .exact_int = 16 }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .fast_imm16, null, null, null },
+            .src_constraints = .{ .{ .unsigned_int = .word }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .@"and", .dst0w, .ua(.src0, .add_umax), ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .exact_int = 32 }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .src_constraints = .{ .{ .signed_int = .dword }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._l, .sa, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ },
+                .{ ._, ._r, .sa, .dst0d, .uia(32, .src0, .sub_bit_size), ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .unsigned_int = .dword }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .@"and", .dst0d, .ua(.src0, .add_umax), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .exact_int = 64 }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .signed_int = .qword }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._l, .sa, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ },
+                .{ ._, ._r, .sa, .dst0q, .uia(64, .src0, .sub_bit_size), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", .bmi2, null, null },
+            .src_constraints = .{ .{ .unsigned_int = .qword }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_gpr, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_bit_size), ._, ._ },
+                .{ ._, ._, .bzhi, .dst0q, .src0q, .tmp0q, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .unsigned_int = .qword }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_gpr, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+                .{ ._, ._, .@"and", .dst0q, .tmp0q, ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
+                .{ ._, ._l, .sa, .tmp0q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ },
+                .{ ._, ._r, .sa, .tmp0q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ },
+                .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
+                .{ ._, ._r, .sa, .tmp0q, .ui(63), ._, ._ },
+                .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .remainder_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+                .{ ._, ._, .@"and", .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
+                .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .xword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ },
+                .{ ._, ._l, .sa, .tmp0q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ },
+                .{ ._, ._r, .sa, .tmp0q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ },
+                .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .remainder_unsigned_int = .{ .of = .xword, .is = .xword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ },
+                .{ ._, ._, .@"and", .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .xword, .is = 8 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .yword, .is = 8 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                .{ ._, .vp_b, .add, .dst0x, .dst0x, .lea(.tmp0x), ._ },
+                .{ ._, .vp_, .xor, .dst0x, .dst0x, .lea(.tmp0x), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                .{ ._, .p_b, .add, .dst0x, .lea(.tmp0x), ._, ._ },
+                .{ ._, .p_, .xor, .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, ._ps, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .yword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                .{ ._, .vp_b, .add, .dst0y, .dst0y, .lea(.tmp0y), ._ },
+                .{ ._, .vp_, .xor, .dst0y, .dst0y, .lea(.tmp0y), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .yword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_mem, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_32_i8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_32_i8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_32_i8, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp3y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .vp_b, .add, .tmp3y, .tmp3y, .tmp2y, ._ },
+                .{ ._, .vp_, .xor, .tmp3y, .tmp3y, .tmp2y, ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp3y, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp3x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .vp_b, .add, .tmp3x, .tmp3x, .tmp2x, ._ },
+                .{ ._, .vp_, .xor, .tmp3x, .tmp3x, .tmp2x, ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._dqa, .mov, .tmp3x, .tmp1x, ._, ._ },
+                .{ ._, .p_, .@"and", .tmp3x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, .p_b, .add, .tmp3x, .tmp2x, ._, ._ },
+                .{ ._, .p_, .xor, .tmp3x, .tmp2x, ._, ._ },
+                .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                .{ ._, .p_, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, ._ps, .mova, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._ps, .mova, .tmp2x, .tmp1x, ._, ._ },
+                .{ ._, ._ps, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .slow_incdec, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .slow_incdec, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .xword, .is = 16 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .yword, .is = 16 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, .vp_w, .sll, .dst0x, .src0x, .uia(16, .dst0, .sub_bit_size), ._ },
+                .{ ._, .vp_w, .sra, .dst0x, .dst0x, .uia(16, .dst0, .sub_bit_size), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, .p_w, .sll, .dst0x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, .p_w, .sra, .dst0x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, ._ps, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .yword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, .vp_w, .sll, .dst0y, .src0y, .uia(16, .dst0, .sub_bit_size), ._ },
+                .{ ._, .vp_w, .sra, .dst0y, .dst0y, .uia(16, .dst0, .sub_bit_size), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .yword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_exact_int = .{ .of = .word, .is = 16 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_mem, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_16_i16, .kind = .{ .rc = .sse } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, .vp_w, .sll, .tmp1y, .tmp1y, .uia(16, .dst0, .sub_bit_size), ._ },
+                .{ ._, .vp_w, .sra, .tmp1y, .tmp1y, .uia(16, .dst0, .sub_bit_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_16_u16, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_16_u16, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_8_i16, .kind = .{ .rc = .sse } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, .vp_w, .sll, .tmp1x, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._ },
+                .{ ._, .vp_w, .sra, .tmp1x, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_8_i16, .kind = .{ .rc = .sse } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, .p_w, .sll, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, .p_w, .sra, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                .{ ._, .p_, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, ._ps, .mova, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._ps, .mova, .tmp2x, .tmp1x, ._, ._ },
+                .{ ._, ._ps, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .fast_imm16, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._, .@"and", .tmp1w, .sa(.dst0, .add_umax), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .xword, .is = 32 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .yword, .is = 32 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, .vp_d, .sll, .dst0x, .src0x, .uia(32, .dst0, .sub_bit_size), ._ },
+                .{ ._, .vp_d, .sra, .dst0x, .dst0x, .uia(32, .dst0, .sub_bit_size), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, .p_d, .sll, .dst0x, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, .p_d, .sra, .dst0x, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, ._ps, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, .vp_d, .sll, .dst0y, .src0y, .uia(32, .dst0, .sub_bit_size), ._ },
+                .{ ._, .vp_d, .sra, .dst0y, .dst0y, .uia(32, .dst0, .sub_bit_size), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_8_i32, .kind = .{ .rc = .sse } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, .vp_d, .sll, .tmp1y, .tmp1y, .uia(32, .dst0, .sub_bit_size), ._ },
+                .{ ._, .vp_d, .sra, .tmp1y, .tmp1y, .uia(32, .dst0, .sub_bit_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, .vp_d, .sll, .tmp1x, .tmp1x, .uia(32, .dst0, .sub_bit_size), ._ },
+                .{ ._, .vp_d, .sra, .tmp1x, .tmp1x, .uia(32, .dst0, .sub_bit_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, .p_d, .sll, .tmp1x, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, .p_d, .sra, .tmp1x, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                .{ ._, .p_, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, ._ps, .mova, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._ps, .mova, .tmp2x, .tmp1x, ._, ._ },
+                .{ ._, ._ps, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_exact_int = .{ .of = .dword, .is = 32 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_mem, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .bmi2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                .{ .@"0:", ._, .bzhi, .tmp2d, .memia(.src0d, .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp2d, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .xword, .is = 64 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .yword, .is = 64 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                .{ ._, .vp_q, .add, .dst0x, .dst0x, .lea(.tmp0x), ._ },
+                .{ ._, .vp_, .xor, .dst0x, .dst0x, .lea(.tmp0x), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                .{ ._, .p_q, .add, .dst0x, .lea(.tmp0x), ._, ._ },
+                .{ ._, .p_, .xor, .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, ._ps, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                .{ ._, .vp_q, .add, .dst0y, .dst0y, .lea(.tmp0y), ._ },
+                .{ ._, .vp_, .xor, .dst0y, .dst0y, .lea(.tmp0y), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_sse, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp3y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .vp_q, .add, .tmp3y, .tmp3y, .tmp2y, ._ },
+                .{ ._, .vp_, .xor, .tmp3y, .tmp3y, .tmp2y, ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp3y, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp3x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .vp_q, .add, .tmp3x, .tmp3x, .tmp2x, ._ },
+                .{ ._, .vp_, .xor, .tmp3x, .tmp3x, .tmp2x, ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", .vp_, .@"and", .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._dqa, .mov, .tmp3x, .tmp1x, ._, ._ },
+                .{ ._, .p_, .@"and", .tmp3x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, .p_q, .add, .tmp3x, .tmp2x, ._, ._ },
+                .{ ._, .p_, .xor, .tmp3x, .tmp2x, ._, ._ },
+                .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse2, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                .{ ._, .p_, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                .{ ._, ._ps, .mova, .tmp1x, .lea(.tmp0x), ._, ._ },
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._ps, .mova, .tmp2x, .tmp1x, ._, ._ },
+                .{ ._, ._ps, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .src_constraints = .{ .{ .multiple_scalar_exact_int = .{ .of = .qword, .is = 64 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_mem, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._l, .sa, .tmp1q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._r, .sa, .tmp1q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", .bmi2, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                .{ .@"0:", ._, .bzhi, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), .tmp1q, ._ },
+                .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp2q, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .mem, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .mov, .tmp1q, .ua(.dst0, .add_umax), ._, ._ },
+                .{ ._, ._, .@"and", .tmp1q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .sse, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .xword, .is = 128 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .avx, null, null, null },
+            .src_constraints = .{ .{ .scalar_exact_int = .{ .of = .yword, .is = 128 } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+                .{ .src = .{ .to_mut_sse, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .src_constraints = .{ .{ .scalar_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .to_mut_mem, .none, .none } },
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .each = .{ .once = &.{} },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .mov, .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
+                .{ ._, ._l, .sa, .tmp1q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ },
+                .{ ._, ._r, .sa, .tmp1q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ },
+                .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp1q, ._, ._ },
+                .{ ._, ._r, .sa, .tmp1q, .ui(63), ._, ._ },
+                .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp1q, ._, ._ },
+                .{ ._, ._, .sub, .tmp0d, .sa(.src0, .add_elem_size), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+                .{ .@"0:", ._, .@"and", .memi(.dst0q, .tmp0), .tmp1q, ._, ._ },
+                .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .si(0), ._, ._ },
+                .{ ._, ._, .sub, .tmp0d, .sa(.src0, .add_elem_size), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .xword, .is = .xword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_unaligned_size), ._, ._ },
+                .{ .@"0:", ._, .mov, .tmp1q, .memi(.src0q, .tmp0), ._, ._ },
+                .{ ._, ._l, .sa, .tmp1q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ },
+                .{ ._, ._r, .sa, .tmp1q, .uia(64, .src0, .sub_bit_size_rem_64), ._, ._ },
+                .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp1q, ._, ._ },
+                .{ ._, ._, .sub, .tmp0d, .sa(.src0, .add_elem_size), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        }, .{
+            .required_features = .{ .@"64bit", null, null, null },
+            .src_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .xword } }, .any, .any },
+            .patterns = &.{
+                .{ .src = .{ .mut_mem, .none, .none } },
+            },
+            .extra_temps = .{
+                .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+                .unused,
+            },
+            .dst_temps = .{ .{ .ref = .src0 }, .unused },
+            .clobbers = .{ .eflags = true },
+            .each = .{ .once = &.{
+                .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_unaligned_size), ._, ._ },
+                .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ },
+                .{ .@"0:", ._, .@"and", .memi(.dst0q, .tmp0), .tmp1q, ._, ._ },
+                .{ ._, ._, .sub, .tmp0d, .sa(.src0, .add_elem_size), ._, ._ },
+                .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+            } },
+        } });
+        for (ops) |op| for (res) |r| {
+            if (op.index == r.index) break;
+        } else try op.die(cg);
+        temp.* = res[0];
+    }
+
+    /// Supports any `op` using `cg.intInfo(lhs.typeOf(cg)).?.signedness` as the signedness.
+    /// Returns `error.SelectFailed` when `cg.intInfo(lhs.typeOf(cg)) == null`.
+    fn cmpInts(lhs: *Temp, op: std.math.CompareOperator, rhs: *Temp, cg: *CodeGen) Select.Error!Temp {
+        var ops: [2]Temp = .{ lhs.*, rhs.* };
+        var res: [1]Temp = undefined;
+        switch (op) {
+            .lt, .lte, .gte, .gt => {
+                const commute = switch (op) {
+                    .lt, .gte => false,
+                    .lte, .gt => true,
+                    else => unreachable,
+                };
+                if (commute) std.mem.swap(Temp, &ops[0], &ops[1]);
+                try cg.select(&res, &.{.bool}, &ops, comptime &.{ .{
+                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .imm8, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm8, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{ .{ .cc = .g }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm8, .none } },
+                        .{ .src = .{ .to_gpr, .imm8, .none } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .l }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .imm8, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm8, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{ .{ .cc = .a }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm8, .none } },
+                        .{ .src = .{ .to_gpr, .imm8, .none } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .b }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{ .{ .cc = .g }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm16, .none } },
+                        .{ .src = .{ .to_gpr, .imm16, .none } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .l }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{ .{ .cc = .a }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm16, .none } },
+                        .{ .src = .{ .to_gpr, .imm16, .none } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .b }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{ .{ .cc = .g }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm32, .none } },
+                        .{ .src = .{ .to_gpr, .imm32, .none } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .l }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{ .{ .cc = .a }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm32, .none } },
+                        .{ .src = .{ .to_gpr, .imm32, .none } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .b }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{ .{ .cc = .g }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .simm32, .none } },
+                        .{ .src = .{ .to_gpr, .simm32, .none } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .l }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                    },
+                    .dst_temps = .{ .{ .cc = .a }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .simm32, .none } },
+                        .{ .src = .{ .to_gpr, .simm32, .none } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .b }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_signed_int = .{ .of = .qword, .is = .qword } },
+                        .{ .remainder_signed_int = .{ .of = .qword, .is = .qword } },
+                        .any,
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{ .{ .cc = .l }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -8), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{
+                        .{ .remainder_unsigned_int = .{ .of = .qword, .is = .qword } },
+                        .{ .remainder_unsigned_int = .{ .of = .qword, .is = .qword } },
+                        .any,
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{ .{ .cc = .b }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .remainder_signed_int = .{ .of = .dword, .is = .dword } },
+                        .{ .remainder_signed_int = .{ .of = .dword, .is = .dword } },
+                        .any,
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{ .{ .cc = .l }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_4), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"4", .tmp0, .add_size, -4), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"4", .tmp0, .add_size, -4), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -4), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -4), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } },
+                        .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } },
+                        .any,
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{ .{ .cc = .b }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_4), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"4", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"4", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                } });
+                if (commute) std.mem.swap(Temp, &ops[0], &ops[1]);
+            },
+            .eq, .neq => {
+                try cg.select(&res, &.{.bool}, &ops, comptime &.{ .{
+                    .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm8, .none } },
+                        .{ .src = .{ .imm8, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .imm8, .none } },
+                        .{ .src = .{ .imm8, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .e }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm16, .none } },
+                        .{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .imm16, .none } },
+                        .{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .e }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .imm32, .none } },
+                        .{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .imm32, .none } },
+                        .{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .e }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .simm32, .none } },
+                        .{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .simm32, .none } },
+                        .{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .mem, .none } },
+                        .{ .src = .{ .mem, .to_gpr, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                    },
+                    .dst_temps = .{ .{ .cc = .e }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, .mmx, null, null },
+                    .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_mm, .mem, .none } },
+                        .{ .src = .{ .mem, .to_mut_mm, .none }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_mm, .to_mm, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .rc = .mmx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{ .{ .cc = .z }, .unused },
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ },
+                        .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ },
+                        .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ },
+                        .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ },
                         .{ ._, ._, .cmp, .tmp0b, .si(-1), ._, ._ },
                     } },
                 }, .{
@@ -103648,9 +106046,14 @@ const Select = struct {
         scalar_int: OfIsSizes,
         scalar_signed_int: OfIsSizes,
         scalar_unsigned_int: OfIsSizes,
+        scalar_signed_or_exclusive_int: OfIsSizes,
+        scalar_exact_int: struct { of: Memory.Size, is: u16 },
+        scalar_exact_signed_int: struct { of: Memory.Size, is: u16 },
+        scalar_exact_unsigned_int: struct { of: Memory.Size, is: u16 },
         multiple_scalar_int: OfIsSizes,
         multiple_scalar_signed_int: OfIsSizes,
         multiple_scalar_unsigned_int: OfIsSizes,
+        multiple_scalar_signed_or_exclusive_int: OfIsSizes,
         multiple_scalar_exact_int: struct { of: Memory.Size, is: u16 },
         multiple_scalar_exact_signed_int: struct { of: Memory.Size, is: u16 },
         multiple_scalar_exact_unsigned_int: struct { of: Memory.Size, is: u16 },
@@ -103739,6 +106142,19 @@ const Select = struct {
                 .scalar_unsigned_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
                     if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
                         of_is.is.bitSize(cg.target) >= int_info.bits else false,
+                .scalar_signed_or_exclusive_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
+                    if (cg.intInfo(ty)) |int_info| switch (int_info.signedness) {
+                        .signed => of_is.is.bitSize(cg.target) >= int_info.bits,
+                        .unsigned => of_is.is.bitSize(cg.target) > int_info.bits,
+                    } else false,
+                .scalar_exact_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| of_is.is == int_info.bits else false,
+                .scalar_exact_signed_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .signed and
+                        of_is.is == int_info.bits else false,
+                .scalar_exact_unsigned_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
+                        of_is.is == int_info.bits else false,
                 .multiple_scalar_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
                     if (cg.intInfo(ty.scalarType(zcu))) |int_info| of_is.is.bitSize(cg.target) >= int_info.bits else false,
                 .multiple_scalar_signed_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
@@ -103747,6 +106163,11 @@ const Select = struct {
                 .multiple_scalar_unsigned_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
                     if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
                         of_is.is.bitSize(cg.target) >= int_info.bits else false,
+                .multiple_scalar_signed_or_exclusive_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| switch (int_info.signedness) {
+                        .signed => of_is.is.bitSize(cg.target) >= int_info.bits,
+                        .unsigned => of_is.is.bitSize(cg.target) > int_info.bits,
+                    } else false,
                 .multiple_scalar_exact_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
                     if (cg.intInfo(ty.scalarType(zcu))) |int_info| of_is.is == int_info.bits else false,
                 .multiple_scalar_exact_signed_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
@@ -104410,9 +106831,9 @@ const Select = struct {
                 src0_size,
                 delta_size,
                 delta_elem_size,
-                size_add_elem_size,
-                size_sub_elem_size,
                 unaligned_size,
+                unaligned_size_add_elem_size,
+                unaligned_size_sub_elem_size,
                 bit_size,
                 src0_bit_size,
                 @"8_size_sub_bit_size",
@@ -104448,10 +106869,10 @@ const Select = struct {
             const add_delta_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_size, .op = .div, .rhs = .@"8" };
             const add_delta_elem_size: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .mul, .rhs = .@"1" };
             const add_delta_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .div, .rhs = .@"8" };
-            const add_size_add_elem_size: Adjust = .{ .sign = .pos, .lhs = .size_add_elem_size, .op = .mul, .rhs = .@"1" };
-            const add_size_sub_elem_size: Adjust = .{ .sign = .pos, .lhs = .size_sub_elem_size, .op = .mul, .rhs = .@"1" };
             const add_unaligned_size: Adjust = .{ .sign = .pos, .lhs = .unaligned_size, .op = .mul, .rhs = .@"1" };
             const sub_unaligned_size: Adjust = .{ .sign = .neg, .lhs = .unaligned_size, .op = .mul, .rhs = .@"1" };
+            const add_unaligned_size_add_elem_size: Adjust = .{ .sign = .pos, .lhs = .unaligned_size_add_elem_size, .op = .mul, .rhs = .@"1" };
+            const add_unaligned_size_sub_elem_size: Adjust = .{ .sign = .pos, .lhs = .unaligned_size_sub_elem_size, .op = .mul, .rhs = .@"1" };
             const add_2_bit_size: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .mul, .rhs = .@"2" };
             const add_bit_size: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .mul, .rhs = .@"1" };
             const add_bit_size_rem_64: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .rem_8_mul, .rhs = .@"8" };
@@ -105161,15 +107582,15 @@ const Select = struct {
                     @as(SignedImm, @intCast(op.flags.index.ref.typeOf(s).abiSize(s.cg.pt.zcu)))),
                 .delta_elem_size => @intCast(@as(SignedImm, @intCast(op.flags.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))) -
                     @as(SignedImm, @intCast(op.flags.index.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)))),
-                .size_add_elem_size => {
+                .unaligned_size => @intCast(s.cg.unalignedSize(op.flags.base.ref.typeOf(s))),
+                .unaligned_size_add_elem_size => {
                     const ty = op.flags.base.ref.typeOf(s);
-                    break :lhs @intCast(ty.abiSize(s.cg.pt.zcu) + ty.elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu));
+                    break :lhs @intCast(s.cg.unalignedSize(ty) + ty.elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu));
                 },
-                .size_sub_elem_size => {
+                .unaligned_size_sub_elem_size => {
                     const ty = op.flags.base.ref.typeOf(s);
-                    break :lhs @intCast(ty.abiSize(s.cg.pt.zcu) - ty.elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu));
+                    break :lhs @intCast(s.cg.unalignedSize(ty) - ty.elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu));
                 },
-                .unaligned_size => @intCast(s.cg.unalignedSize(op.flags.base.ref.typeOf(s))),
                 .bit_size => @intCast(op.flags.base.ref.typeOf(s).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)),
                 .src0_bit_size => @intCast(Select.Operand.Ref.src0.typeOf(s).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)),
                 .@"8_size_sub_bit_size" => {
test/behavior/x86_64/math.zig
@@ -18000,76 +18000,1252 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
             try testArgs(f128, nan(f128), nan(f128));
         }
         fn testIntVectors() !void {
-            try testArgs(@Vector(1, i4), .{
+            try testArgs(@Vector(1, i1), .{
+                0x0,
+            }, .{
+                -0x1,
+            });
+            try testArgs(@Vector(2, i1), .{
+                0x0, 0x00,
+            }, .{
+                -0x1, -0x1,
+            });
+            try testArgs(@Vector(4, i1), .{
+                0x0, 0x0, 0x0, 0x0,
+            }, .{
+                -0x1, -0x1, -0x1, -0x1,
+            });
+            try testArgs(@Vector(8, i1), .{
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+            }, .{
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+            });
+            try testArgs(@Vector(16, i1), .{
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+            }, .{
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+            });
+            try testArgs(@Vector(32, i1), .{
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+            }, .{
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+            });
+            try testArgs(@Vector(64, i1), .{
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+            }, .{
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+            });
+            try testArgs(@Vector(128, i1), .{
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+            }, .{
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+                -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1,
+            });
+
+            try testArgs(@Vector(1, u1), .{
+                0x0,
+            }, .{
                 0x1,
+            });
+            try testArgs(@Vector(2, u1), .{
+                0x0, 0x1,
             }, .{
-                0x3,
+                0x1, 0x1,
             });
-            try testArgs(@Vector(2, i4), .{
-                -0x1, 0x7,
+            try testArgs(@Vector(4, u1), .{
+                0x0, 0x0, 0x1, 0x0,
             }, .{
-                -0x7, 0x6,
+                0x1, 0x1, 0x1, 0x1,
             });
-            try testArgs(@Vector(4, i4), .{
-                -0x1, 0x2, -0x3, -0x6,
+            try testArgs(@Vector(8, u1), .{
+                0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1,
             }, .{
-                -0x2, -0x6, -0x4, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
             });
-            try testArgs(@Vector(8, i4), .{
-                -0x4, 0x6, -0x4, -0x1, -0x1, 0x6, 0x5, 0x2,
+            try testArgs(@Vector(16, u1), .{
+                0x1, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1, 0x0, 0x1,
+            }, .{
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+            });
+            try testArgs(@Vector(32, u1), .{
+                0x0, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0,
+                0x1, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0,
+            }, .{
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+            });
+            try testArgs(@Vector(64, u1), .{
+                0x1, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x1, 0x1,
+                0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x0, 0x0, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x1,
+                0x1, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x0,
+            }, .{
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+            });
+            try testArgs(@Vector(128, u1), .{
+                0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1,
+                0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x1, 0x1, 0x1, 0x0, 0x0, 0x1,
+                0x1, 0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1, 0x1, 0x0, 0x1,
+                0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x1,
+                0x0, 0x1, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1, 0x0, 0x1, 0x1,
+                0x1, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1, 0x1, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
+                0x0, 0x0, 0x1, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1, 0x1, 0x0, 0x1,
+            }, .{
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
+            });
+
+            // workaround https://github.com/ziglang/zig/issues/22914
+            // TODO: try testArgs(@Vector(1, i2), .{
+            //     0x1,
+            // }, .{
+            //     0x1,
+            // });
+            // try testArgs(@Vector(2, i2), .{
+            //     0x0, -0x2,
+            // }, .{
+            //     -0x2, -0x2,
+            // });
+            // try testArgs(@Vector(4, i2), .{
+            //     -0x2, -0x1, 0x0, -0x2,
+            // }, .{
+            //     -0x2, 0x1, -0x1, -0x2,
+            // });
+            // try testArgs(@Vector(8, i2), .{
+            //     -0x1, 0x1, 0x1, -0x1, -0x2, -0x2, 0x0, -0x2,
+            // }, .{
+            //     -0x1, -0x1, -0x2, -0x1, -0x2, 0x1, -0x1, 0x1,
+            // });
+            // try testArgs(@Vector(16, i2), .{
+            //     0x0, -0x2, -0x1, -0x2, 0x1, 0x0, -0x1, 0x0, 0x1, -0x2, 0x1, -0x1, -0x2, -0x2, 0x1, 0x0,
+            // }, .{
+            //     -0x2, -0x2, 0x1, -0x2, -0x1, -0x2, -0x1, -0x2, -0x2, -0x2, -0x1, -0x2, 0x1, -0x2, -0x2, -0x2,
+            // });
+            // try testArgs(@Vector(32, i2), .{
+            //     -0x2, 0x1, -0x1, 0x1, 0x0, -0x2, 0x1, -0x2, -0x2, 0x0,  -0x1, 0x0,  -0x2, -0x2, 0x0, 0x1,
+            //     -0x1, 0x1, -0x1, 0x1, 0x1, 0x1,  0x1, -0x2, -0x1, -0x1, 0x1,  -0x2, 0x0,  -0x1, 0x0, -0x2,
+            // }, .{
+            //     0x1,  -0x1, 0x1, -0x1, 0x1,  0x1,  0x1,  -0x2, 0x1,  -0x2, -0x1, -0x2, 0x1,  0x1, 0x1,  -0x1,
+            //     -0x2, 0x1,  0x1, -0x1, -0x2, -0x2, -0x1, -0x2, -0x1, -0x2, 0x1,  -0x2, -0x1, 0x1, -0x2, -0x2,
+            // });
+            // try testArgs(@Vector(64, i2), .{
+            //     0x1,  -0x2, -0x1, 0x0,  0x1,  -0x2, -0x1, -0x2, -0x2, -0x1, -0x2, -0x1, 0x1, 0x1,  0x0,  0x1,
+            //     -0x1, -0x1, -0x1, 0x1,  0x1,  -0x1, 0x0,  0x1,  -0x1, 0x0,  0x0,  0x1,  0x1, 0x0,  -0x2, -0x2,
+            //     0x1,  0x0,  -0x2, -0x2, 0x1,  -0x2, -0x2, 0x1,  0x1,  -0x2, 0x1,  0x0,  0x0, -0x1, 0x0,  0x1,
+            //     -0x2, 0x0,  0x0,  -0x1, -0x1, 0x1,  -0x2, 0x0,  -0x2, 0x0,  -0x2, 0x1,  0x0, -0x1, -0x1, 0x1,
+            // }, .{
+            //     -0x2, -0x2, 0x1,  -0x1, -0x2, -0x2, -0x1, -0x2, 0x1,  0x1,  0x1,  -0x1, 0x1,  0x1,  0x1,  -0x1,
+            //     -0x2, 0x1,  0x1,  -0x2, -0x2, 0x1,  0x1,  -0x1, -0x2, -0x2, 0x1,  -0x1, -0x2, 0x1,  -0x2, 0x1,
+            //     0x1,  -0x2, -0x2, -0x2, -0x2, 0x1,  0x1,  0x1,  -0x2, 0x1,  -0x1, 0x1,  -0x1, 0x1,  0x1,  -0x1,
+            //     -0x2, 0x1,  -0x1, 0x1,  -0x1, -0x1, 0x1,  0x1,  -0x2, 0x1,  0x1,  -0x2, -0x2, -0x1, -0x2, -0x2,
+            // });
+            // try testArgs(@Vector(128, i2), .{
+            //     -0x1, -0x2, 0x0,  -0x2, -0x2, 0x1,  -0x1, 0x0,  -0x1, -0x2, 0x0,  -0x2, 0x0,  0x1,  0x0,  -0x1,
+            //     0x0,  -0x2, 0x1,  0x0,  0x1,  0x0,  -0x2, 0x1,  0x1,  0x1,  -0x1, 0x1,  0x0,  -0x1, 0x1,  -0x1,
+            //     0x1,  -0x2, 0x1,  -0x2, 0x1,  -0x2, 0x1,  -0x2, -0x2, -0x2, 0x0,  0x0,  0x1,  0x1,  -0x2, -0x1,
+            //     0x1,  0x0,  0x0,  0x1,  -0x2, -0x1, 0x0,  -0x1, 0x1,  -0x2, 0x1,  0x0,  0x1,  0x0,  0x0,  -0x2,
+            //     0x0,  0x0,  -0x1, 0x1,  -0x1, 0x0,  -0x1, -0x2, 0x1,  -0x2, -0x2, -0x1, -0x2, 0x0,  0x0,  0x0,
+            //     -0x1, -0x1, -0x1, -0x1, -0x2, 0x0,  -0x1, 0x1,  0x0,  0x0,  -0x2, 0x0,  0x0,  0x0,  0x0,  0x1,
+            //     0x1,  -0x2, 0x0,  0x0,  -0x1, -0x1, 0x1,  -0x1, -0x2, 0x0,  -0x1, -0x1, -0x2, -0x2, 0x0,  0x0,
+            //     -0x1, 0x0,  0x1,  0x0,  -0x1, -0x2, 0x1,  -0x2, -0x1, -0x1, 0x0,  0x0,  -0x1, 0x0,  0x0,  0x1,
+            // }, .{
+            //     -0x2, -0x2, 0x1,  0x1,  -0x2, -0x1, 0x1,  0x1,  0x1,  -0x2, -0x2, 0x1,  -0x2, -0x2, 0x1,  -0x1,
+            //     -0x1, -0x2, 0x1,  -0x1, 0x1,  0x1,  0x1,  -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, 0x1,  0x1,  -0x2,
+            //     -0x1, -0x2, -0x2, -0x2, -0x2, -0x2, -0x1, -0x2, 0x1,  0x1,  -0x1, -0x1, -0x1, -0x1, 0x1,  -0x2,
+            //     0x1,  -0x1, 0x1,  -0x1, 0x1,  0x1,  -0x1, -0x2, 0x1,  0x1,  -0x2, -0x2, -0x2, 0x1,  0x1,  -0x2,
+            //     -0x1, 0x1,  -0x2, -0x1, -0x1, 0x1,  -0x2, -0x2, 0x1,  0x1,  -0x2, -0x1, -0x2, -0x2, -0x2, -0x2,
+            //     0x1,  -0x1, 0x1,  -0x2, 0x1,  -0x1, -0x1, 0x1,  0x1,  -0x1, 0x1,  -0x1, -0x2, -0x2, -0x1, -0x2,
+            //     -0x2, 0x1,  -0x2, -0x2, -0x1, -0x1, -0x1, -0x2, 0x1,  -0x2, 0x1,  -0x2, 0x1,  -0x2, -0x2, 0x1,
+            //     0x1,  -0x1, 0x1,  0x1,  -0x1, -0x2, 0x1,  0x1,  -0x1, -0x2, -0x1, -0x1, 0x1,  -0x2, -0x2, -0x2,
+            // });
+            try testArgs(@Vector(1, i2), .{
+                -0x1,
+            }, .{
+                -0x2,
+            });
+            try testArgs(@Vector(2, i2), .{
+                -0x1, -0x1,
+            }, .{
+                0x1, -0x1,
+            });
+            try testArgs(@Vector(4, i2), .{
+                -0x1, 0x1, -0x1, -0x1,
+            }, .{
+                0x1, -0x1, -0x1, 0x1,
+            });
+            try testArgs(@Vector(8, i2), .{
+                0x1, 0x0, 0x0, 0x0, 0x0, 0x0, -0x1, 0x1,
+            }, .{
+                0x1, -0x1, -0x1, -0x2, -0x1, -0x2, -0x1, 0x1,
+            });
+            try testArgs(@Vector(16, i2), .{
+                0x1, 0x0, 0x0, -0x1, 0x1, 0x0, 0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, 0x1, 0x1, 0x0,
+            }, .{
+                0x1, 0x1, 0x1, 0x1, -0x2, -0x2, -0x1, -0x1, -0x1, -0x2, 0x1, 0x1, -0x1, -0x2, -0x1, -0x1,
+            });
+            try testArgs(@Vector(32, i2), .{
+                0x0,  0x1, -0x1, -0x1, -0x1, 0x0,  -0x1, 0x0,  -0x1, -0x1, 0x1,  -0x1, 0x0, -0x1, 0x0, -0x1,
+                -0x1, 0x1, 0x1,  0x1,  -0x1, -0x1, 0x1,  -0x1, -0x1, 0x1,  -0x1, -0x1, 0x1, 0x0,  0x0, 0x1,
+            }, .{
+                -0x2, -0x1, -0x2, 0x1,  -0x1, 0x1,  -0x2, -0x2, -0x1, -0x1, -0x1, -0x1, 0x1, 0x1,  0x1,  -0x2,
+                -0x1, -0x1, -0x2, -0x2, -0x1, -0x2, -0x1, 0x1,  -0x2, -0x1, -0x1, -0x2, 0x1, -0x2, -0x2, -0x1,
+            });
+            try testArgs(@Vector(64, i2), .{
+                0x1,  0x1,  -0x1, 0x0, 0x0,  0x1,  0x1,  -0x1, 0x0, 0x1, 0x1,  0x1,  0x1,  0x1,  0x0,  0x0,
+                -0x1, 0x0,  0x0,  0x0, 0x0,  -0x1, 0x0,  0x1,  0x1, 0x0, 0x1,  -0x1, -0x1, 0x0,  -0x1, 0x0,
+                -0x1, 0x1,  -0x1, 0x0, -0x1, 0x1,  0x0,  0x0,  0x0, 0x1, -0x1, 0x0,  -0x1, 0x1,  0x0,  0x0,
+                0x1,  -0x1, -0x1, 0x0, 0x1,  -0x1, -0x1, 0x0,  0x1, 0x1, -0x1, 0x0,  0x1,  -0x1, 0x0,  0x0,
+            }, .{
+                -0x1, 0x1,  -0x2, -0x1, -0x1, 0x1,  0x1,  -0x2, -0x2, 0x1, 0x1,  -0x1, -0x2, 0x1,  0x1,  -0x1,
+                -0x1, -0x1, 0x1,  -0x2, -0x2, -0x2, -0x1, -0x1, -0x2, 0x1, -0x2, -0x2, -0x2, 0x1,  -0x2, -0x2,
+                -0x2, -0x1, -0x1, 0x1,  -0x2, -0x1, -0x1, -0x1, -0x2, 0x1, -0x1, 0x1,  -0x2, -0x2, 0x1,  -0x1,
+                -0x1, 0x1,  -0x2, -0x2, -0x2, -0x2, -0x2, -0x2, -0x2, 0x1, 0x1,  -0x1, 0x1,  0x1,  -0x1, -0x2,
+            });
+            try testArgs(@Vector(128, i2), .{
+                0x1, -0x1, -0x1, -0x1, -0x1, -0x1, -0x1, 0x0, 0x1,  0x0,  -0x1, 0x0,  -0x1, 0x0,  0x0,  0x0,
+                0x0, 0x0,  0x1,  -0x1, 0x1,  -0x1, -0x1, 0x1, 0x0,  0x1,  0x1,  0x0,  0x1,  -0x1, 0x0,  -0x1,
+                0x0, 0x1,  -0x1, 0x1,  -0x1, -0x1, 0x0,  0x1, 0x1,  0x0,  0x0,  0x0,  0x1,  0x0,  -0x1, 0x1,
+                0x0, 0x0,  0x0,  0x0,  0x0,  0x1,  -0x1, 0x1, 0x1,  -0x1, 0x1,  -0x1, 0x0,  0x1,  0x0,  0x1,
+                0x0, -0x1, 0x0,  0x0,  0x1,  -0x1, 0x0,  0x0, -0x1, -0x1, 0x0,  0x0,  0x0,  0x0,  0x1,  0x1,
+                0x0, -0x1, 0x1,  -0x1, -0x1, 0x0,  0x0,  0x1, -0x1, -0x1, 0x1,  0x0,  -0x1, 0x1,  0x0,  -0x1,
+                0x1, 0x0,  0x1,  -0x1, 0x1,  0x0,  0x0,  0x1, 0x0,  0x0,  0x1,  -0x1, -0x1, 0x0,  0x1,  0x0,
+                0x0, 0x0,  0x1,  0x0,  0x0,  0x1,  -0x1, 0x0, -0x1, -0x1, 0x0,  0x0,  0x0,  0x1,  0x1,  -0x1,
+            }, .{
+                -0x2, -0x2, -0x1, -0x2, -0x1, -0x1, -0x2, -0x2, -0x2, 0x1,  -0x2, -0x2, -0x2, -0x2, -0x2, 0x1,
+                -0x1, 0x1,  -0x1, -0x1, -0x2, -0x2, -0x1, -0x2, 0x1,  -0x2, -0x1, -0x2, -0x2, -0x1, 0x1,  -0x1,
+                -0x2, -0x1, -0x1, -0x2, -0x1, -0x2, -0x1, 0x1,  -0x1, -0x1, 0x1,  -0x1, -0x2, 0x1,  -0x2, -0x1,
+                -0x2, 0x1,  -0x1, -0x2, -0x2, -0x2, -0x2, -0x1, 0x1,  0x1,  0x1,  0x1,  -0x1, -0x2, -0x1, -0x1,
+                -0x2, -0x2, 0x1,  0x1,  -0x2, -0x1, 0x1,  -0x2, -0x1, -0x2, -0x1, -0x2, -0x2, 0x1,  -0x1, 0x1,
+                -0x2, -0x2, 0x1,  -0x2, -0x1, -0x1, 0x1,  -0x1, -0x1, -0x1, -0x2, -0x2, -0x1, 0x1,  0x1,  -0x2,
+                -0x1, -0x2, 0x1,  0x1,  0x1,  0x1,  -0x2, 0x1,  -0x1, -0x1, -0x2, 0x1,  -0x1, -0x2, -0x1, -0x2,
+                -0x2, -0x2, -0x1, -0x1, -0x2, -0x1, 0x1,  -0x1, 0x1,  0x1,  0x1,  -0x2, -0x1, 0x1,  -0x2, 0x1,
+            });
+
+            try testArgs(@Vector(1, u2), .{
+                0x2,
+            }, .{
+                0x2,
+            });
+            try testArgs(@Vector(2, u2), .{
+                0x1, 0x0,
+            }, .{
+                0x2, 0x1,
+            });
+            try testArgs(@Vector(4, u2), .{
+                0x3, 0x3, 0x0, 0x2,
+            }, .{
+                0x3, 0x1, 0x1, 0x3,
+            });
+            try testArgs(@Vector(8, u2), .{
+                0x0, 0x3, 0x3, 0x2, 0x1, 0x2, 0x3, 0x1,
             }, .{
-                0x2, 0x4, -0x3, -0x6, 0x1, -0x5, -0x1, 0x2,
+                0x1, 0x1, 0x3, 0x1, 0x2, 0x2, 0x2, 0x2,
             });
+            try testArgs(@Vector(16, u2), .{
+                0x1, 0x1, 0x0, 0x1, 0x0, 0x2, 0x2, 0x1, 0x2, 0x1, 0x3, 0x1, 0x1, 0x3, 0x3, 0x1,
+            }, .{
+                0x1, 0x2, 0x2, 0x2, 0x1, 0x2, 0x3, 0x1, 0x3, 0x3, 0x2, 0x2, 0x2, 0x1, 0x3, 0x1,
+            });
+            try testArgs(@Vector(32, u2), .{
+                0x2, 0x2, 0x3, 0x1, 0x3, 0x2, 0x1, 0x3, 0x3, 0x0, 0x0, 0x3, 0x3, 0x1, 0x3, 0x0,
+                0x1, 0x0, 0x2, 0x3, 0x2, 0x3, 0x2, 0x0, 0x1, 0x3, 0x1, 0x0, 0x2, 0x0, 0x3, 0x0,
+            }, .{
+                0x3, 0x2, 0x1, 0x1, 0x2, 0x3, 0x1, 0x3, 0x1, 0x1, 0x3, 0x1, 0x2, 0x3, 0x3, 0x2,
+                0x2, 0x2, 0x1, 0x1, 0x1, 0x2, 0x1, 0x1, 0x2, 0x2, 0x2, 0x1, 0x3, 0x2, 0x3, 0x3,
+            });
+            try testArgs(@Vector(64, u2), .{
+                0x1, 0x3, 0x2, 0x1, 0x1, 0x3, 0x1, 0x3, 0x2, 0x1, 0x3, 0x2, 0x2, 0x2, 0x3, 0x0,
+                0x3, 0x1, 0x1, 0x0, 0x1, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x1, 0x3, 0x3, 0x1, 0x3,
+                0x1, 0x2, 0x3, 0x2, 0x3, 0x0, 0x1, 0x1, 0x2, 0x1, 0x0, 0x3, 0x2, 0x3, 0x3, 0x0,
+                0x0, 0x0, 0x3, 0x1, 0x3, 0x0, 0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x1, 0x0, 0x3, 0x3,
+            }, .{
+                0x1, 0x2, 0x2, 0x1, 0x2, 0x1, 0x2, 0x3, 0x3, 0x2, 0x1, 0x2, 0x3, 0x1, 0x2, 0x3,
+                0x2, 0x2, 0x3, 0x1, 0x2, 0x2, 0x2, 0x1, 0x1, 0x2, 0x3, 0x3, 0x2, 0x3, 0x1, 0x1,
+                0x3, 0x2, 0x1, 0x1, 0x3, 0x1, 0x1, 0x1, 0x2, 0x1, 0x3, 0x1, 0x1, 0x3, 0x2, 0x2,
+                0x3, 0x2, 0x3, 0x3, 0x3, 0x1, 0x2, 0x1, 0x3, 0x1, 0x1, 0x2, 0x1, 0x3, 0x3, 0x1,
+            });
+            try testArgs(@Vector(128, u2), .{
+                0x2, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x3, 0x0, 0x1, 0x2, 0x0, 0x0, 0x2, 0x0, 0x1,
+                0x3, 0x0, 0x0, 0x1, 0x3, 0x0, 0x3, 0x0, 0x1, 0x1, 0x2, 0x3, 0x0, 0x1, 0x2, 0x1,
+                0x0, 0x0, 0x3, 0x3, 0x3, 0x2, 0x2, 0x1, 0x0, 0x3, 0x1, 0x1, 0x3, 0x3, 0x1, 0x0,
+                0x1, 0x2, 0x2, 0x1, 0x0, 0x1, 0x2, 0x2, 0x2, 0x1, 0x2, 0x3, 0x2, 0x0, 0x0, 0x0,
+                0x1, 0x3, 0x2, 0x3, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x1, 0x0, 0x2, 0x1, 0x0, 0x3,
+                0x2, 0x1, 0x3, 0x3, 0x1, 0x2, 0x1, 0x3, 0x3, 0x0, 0x1, 0x3, 0x2, 0x1, 0x0, 0x1,
+                0x1, 0x0, 0x2, 0x0, 0x2, 0x2, 0x2, 0x1, 0x3, 0x2, 0x2, 0x3, 0x2, 0x0, 0x0, 0x1,
+                0x1, 0x1, 0x1, 0x1, 0x0, 0x3, 0x1, 0x1, 0x2, 0x1, 0x0, 0x2, 0x3, 0x3, 0x1, 0x2,
+            }, .{
+                0x3, 0x2, 0x2, 0x1, 0x3, 0x3, 0x1, 0x3, 0x2, 0x3, 0x3, 0x1, 0x1, 0x1, 0x3, 0x1,
+                0x2, 0x2, 0x1, 0x3, 0x1, 0x2, 0x3, 0x3, 0x3, 0x3, 0x1, 0x2, 0x3, 0x1, 0x3, 0x3,
+                0x2, 0x1, 0x2, 0x3, 0x1, 0x2, 0x1, 0x2, 0x3, 0x3, 0x3, 0x1, 0x1, 0x2, 0x1, 0x3,
+                0x1, 0x3, 0x1, 0x3, 0x2, 0x2, 0x2, 0x1, 0x3, 0x2, 0x2, 0x2, 0x2, 0x3, 0x1, 0x2,
+                0x2, 0x2, 0x2, 0x3, 0x2, 0x2, 0x2, 0x2, 0x1, 0x2, 0x3, 0x3, 0x1, 0x3, 0x1, 0x2,
+                0x1, 0x1, 0x3, 0x2, 0x2, 0x1, 0x3, 0x2, 0x3, 0x1, 0x2, 0x2, 0x2, 0x2, 0x1, 0x1,
+                0x1, 0x3, 0x1, 0x3, 0x1, 0x3, 0x3, 0x3, 0x2, 0x3, 0x1, 0x1, 0x2, 0x2, 0x3, 0x1,
+                0x2, 0x2, 0x3, 0x3, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x3, 0x1, 0x3, 0x2, 0x3, 0x1,
+            });
+
             // workaround https://github.com/ziglang/zig/issues/22914
-            // TODO: try testArgs(@Vector(16, i4), .{
-            //     0x4, 0x1, -0x7, -0x2, -0x7, 0x4, -0x4, -0x8, -0x1, 0x0, -0x8, 0x5, -0x5, 0x3, 0x3, 0x2,
+            // TODO: try testArgs(@Vector(1, i3), .{
+            //     -0x3,
+            // }, .{
+            //     -0x1,
+            // });
+            // try testArgs(@Vector(2, i3), .{
+            //     0x2, -0x3,
+            // }, .{
+            //     0x1, 0x3,
+            // });
+            // try testArgs(@Vector(4, i3), .{
+            //     0x1, -0x4, -0x2, -0x3,
+            // }, .{
+            //     -0x2, -0x4, 0x2, 0x2,
+            // });
+            // try testArgs(@Vector(8, i3), .{
+            //     0x0, 0x1, 0x3, 0x1, -0x3, 0x1, 0x3, 0x3,
+            // }, .{
+            //     -0x3, 0x2, 0x1, 0x1, -0x4, -0x1, 0x3, -0x2,
+            // });
+            // try testArgs(@Vector(16, i3), .{
+            //     -0x4, 0x3, -0x2, 0x0, -0x2, -0x1, 0x2, -0x4, 0x1, -0x3, 0x2, -0x2, 0x1, -0x2, 0x2, -0x4,
+            // }, .{
+            //     0x2, -0x3, 0x3, 0x1, -0x4, 0x1, -0x1, 0x1, -0x1, -0x3, -0x4, 0x2, 0x3, 0x3, -0x1, -0x4,
+            // });
+            // try testArgs(@Vector(32, i3), .{
+            //     0x1,  -0x3, -0x1, -0x3, -0x3, 0x2, 0x1, 0x0, 0x0, -0x1, 0x3,  -0x2, 0x3,  0x0, -0x3, 0x0,
+            //     -0x4, -0x2, -0x1, -0x4, -0x4, 0x2, 0x2, 0x3, 0x1, 0x2,  -0x4, -0x4, -0x3, 0x1, -0x1, -0x2,
+            // }, .{
+            //     -0x4, -0x2, 0x1, -0x1, 0x3,  0x1, 0x2,  -0x3, 0x2, -0x2, 0x1, -0x1, -0x2, -0x1, -0x1, -0x2,
+            //     -0x2, -0x3, 0x3, -0x3, -0x4, 0x1, -0x3, 0x3,  0x1, -0x3, 0x3, 0x3,  -0x4, 0x3,  0x2,  -0x2,
+            // });
+            // try testArgs(@Vector(64, i3), .{
+            //     0x1,  0x2,  0x1,  0x2,  -0x2, 0x2,  0x2,  -0x1, -0x4, 0x1,  0x3,  0x0,  -0x2, -0x2, 0x2,  -0x2,
+            //     0x0,  -0x4, -0x3, -0x4, -0x1, -0x1, 0x2,  0x2,  -0x2, -0x1, -0x1, 0x3,  0x3,  -0x4, 0x2,  0x0,
+            //     0x3,  0x2,  -0x4, -0x1, 0x1,  0x1,  0x3,  0x1,  0x2,  0x3,  -0x3, 0x1,  -0x4, -0x2, 0x1,  -0x3,
+            //     -0x3, -0x1, 0x1,  -0x3, -0x1, 0x3,  -0x4, -0x4, 0x0,  0x0,  -0x4, -0x2, 0x3,  -0x1, -0x3, -0x3,
+            // }, .{
+            //     0x3,  -0x2, 0x1,  -0x4, 0x1,  0x3,  -0x3, 0x1,  0x3,  0x1,  0x1,  -0x1, -0x2, 0x1,  -0x3, 0x1,
+            //     -0x2, -0x2, 0x3,  -0x3, -0x1, -0x3, 0x1,  -0x1, -0x3, -0x3, 0x1,  -0x2, 0x1,  -0x2, 0x1,  0x2,
+            //     0x3,  -0x4, -0x4, -0x1, 0x1,  0x3,  0x1,  0x1,  -0x4, 0x2,  -0x3, 0x3,  0x3,  -0x1, 0x1,  -0x1,
+            //     -0x2, 0x2,  0x2,  -0x4, -0x4, 0x3,  -0x2, -0x4, -0x1, -0x2, -0x4, 0x1,  0x2,  0x1,  -0x1, -0x2,
+            // });
+            // try testArgs(@Vector(128, i3), .{
+            //     -0x4, 0x1,  0x0,  -0x4, -0x4, 0x1,  -0x4, -0x2, 0x2,  0x2,  -0x3, 0x1,  0x2,  -0x2, 0x1,  0x1,
+            //     0x3,  0x0,  0x3,  -0x4, -0x1, 0x3,  0x3,  -0x4, 0x0,  -0x3, 0x2,  -0x2, 0x0,  0x3,  0x1,  -0x2,
+            //     -0x1, -0x1, 0x3,  -0x1, -0x2, 0x3,  0x3,  0x1,  -0x3, 0x1,  -0x1, -0x2, -0x4, 0x2,  -0x2, -0x1,
+            //     0x1,  0x1,  0x1,  0x0,  -0x1, 0x2,  -0x1, 0x3,  0x2,  -0x1, -0x2, 0x3,  -0x2, 0x3,  0x3,  0x1,
+            //     0x3,  -0x3, -0x4, -0x1, 0x2,  0x2,  -0x2, 0x3,  -0x4, -0x2, -0x1, 0x0,  0x1,  -0x1, 0x0,  0x0,
+            //     -0x2, 0x3,  0x0,  -0x3, -0x4, 0x2,  -0x3, -0x2, -0x4, 0x0,  -0x3, -0x4, -0x4, -0x2, -0x1, -0x3,
+            //     0x0,  -0x1, 0x0,  -0x1, 0x2,  -0x4, -0x3, 0x0,  -0x4, 0x0,  -0x2, 0x1,  -0x2, -0x4, -0x1, -0x1,
+            //     -0x3, 0x3,  -0x1, -0x1, -0x2, 0x1,  0x3,  0x1,  -0x3, 0x1,  -0x4, -0x2, 0x0,  -0x1, -0x2, 0x2,
             // }, .{
-            //     0x7, -0x7, -0x6, -0x1, 0x3, -0x5, -0x3, -0x6, 0x4, 0x4, -0x2, 0x7, -0x2, 0x6, -0x4, -0x1,
+            //     -0x3, 0x2,  -0x3, 0x1,  -0x2, -0x1, -0x3, 0x1,  0x2,  0x2,  -0x2, 0x2,  0x2,  0x1,  0x3,  -0x1,
+            //     -0x4, -0x3, 0x2,  -0x3, -0x2, 0x3,  -0x3, 0x2,  -0x1, -0x3, 0x1,  0x2,  -0x4, 0x2,  -0x2, -0x3,
+            //     0x1,  -0x1, 0x2,  0x2,  -0x1, -0x3, -0x4, 0x2,  0x1,  -0x4, 0x1,  -0x4, 0x2,  -0x1, 0x2,  -0x2,
+            //     0x2,  0x1,  -0x4, 0x3,  0x1,  -0x2, -0x3, -0x4, 0x3,  -0x1, 0x3,  -0x4, -0x2, 0x1,  -0x2, 0x3,
+            //     0x1,  0x1,  0x2,  0x1,  -0x1, -0x2, 0x2,  -0x1, 0x1,  -0x1, -0x3, -0x1, 0x1,  -0x4, -0x1, -0x1,
+            //     -0x3, -0x1, -0x4, 0x3,  0x1,  -0x1, -0x1, -0x1, 0x1,  -0x4, 0x1,  -0x2, -0x4, 0x2,  -0x4, -0x3,
+            //     0x2,  -0x4, -0x1, 0x1,  0x3,  0x2,  -0x1, 0x3,  0x2,  0x2,  0x1,  -0x4, -0x3, 0x1,  -0x1, 0x1,
+            //     -0x2, -0x4, 0x1,  0x3,  -0x1, 0x3,  0x1,  0x2,  -0x4, 0x2,  0x2,  -0x3, -0x3, -0x4, -0x2, 0x3,
             // });
+            try testArgs(@Vector(1, i3), .{
+                -0x1,
+            }, .{
+                -0x3,
+            });
+            try testArgs(@Vector(2, i3), .{
+                0x2, -0x2,
+            }, .{
+                0x3, 0x3,
+            });
+            try testArgs(@Vector(4, i3), .{
+                0x1, 0x0, -0x2, 0x0,
+            }, .{
+                0x2, -0x2, -0x4, 0x3,
+            });
+            try testArgs(@Vector(8, i3), .{
+                0x3, -0x1, -0x3, -0x3, -0x3, -0x2, 0x2, -0x3,
+            }, .{
+                -0x1, 0x1, -0x2, 0x2, 0x1, -0x2, -0x3, -0x1,
+            });
+            try testArgs(@Vector(16, i3), .{
+                -0x1, -0x1, 0x3, -0x2, -0x2, -0x2, -0x2, -0x1, 0x2, 0x1, -0x2, 0x2, -0x2, 0x1, 0x1, -0x3,
+            }, .{
+                0x1, -0x1, -0x1, -0x1, 0x1, 0x3, -0x1, -0x1, 0x1, 0x3, -0x1, -0x3, 0x1, 0x3, 0x1, -0x2,
+            });
+            try testArgs(@Vector(32, i3), .{
+                0x3, -0x1, -0x3, 0x1,  0x0, 0x0, -0x3, -0x1, -0x3, 0x1,  0x1, 0x2, -0x1, 0x1, -0x1, -0x1,
+                0x1, -0x3, 0x1,  -0x1, 0x1, 0x0, -0x2, 0x3,  -0x2, -0x3, 0x3, 0x1, -0x1, 0x0, 0x3,  -0x1,
+            }, .{
+                0x1, -0x2, -0x4, -0x4, 0x2,  0x1, -0x4, -0x3, -0x4, 0x1, -0x3, -0x4, -0x1, 0x2,  -0x1, 0x1,
+                0x2, -0x2, -0x4, 0x3,  -0x3, 0x1, 0x2,  -0x2, -0x2, 0x3, 0x3,  0x2,  -0x2, -0x4, -0x2, -0x3,
+            });
+            try testArgs(@Vector(64, i3), .{
+                -0x2, 0x1,  0x3,  -0x1, -0x3, -0x1, 0x1,  -0x3, 0x1,  -0x2, 0x0,  0x3,  0x0,  -0x2, 0x1,  -0x3,
+                -0x2, 0x1,  -0x2, 0x2,  0x0,  0x1,  -0x3, 0x0,  -0x3, 0x0,  -0x3, -0x2, -0x2, -0x3, -0x2, 0x3,
+                -0x1, 0x1,  0x1,  0x1,  -0x1, 0x0,  -0x3, -0x3, -0x3, -0x3, -0x2, 0x2,  0x3,  0x2,  -0x3, 0x1,
+                -0x3, -0x3, -0x1, -0x1, -0x1, -0x3, -0x2, -0x2, 0x2,  0x3,  0x3,  -0x3, 0x0,  0x3,  -0x3, -0x3,
+            }, .{
+                0x2,  0x1,  -0x1, -0x3, -0x1, -0x4, -0x2, -0x3, 0x2, -0x2, -0x4, 0x2,  0x2,  -0x2, 0x1,  0x1,
+                0x1,  -0x1, -0x3, 0x2,  -0x2, 0x2,  -0x1, 0x3,  0x3, -0x1, -0x1, -0x3, -0x1, 0x2,  -0x2, 0x2,
+                0x1,  -0x4, -0x2, -0x4, 0x2,  -0x1, -0x2, 0x3,  0x3, 0x3,  -0x4, -0x2, 0x3,  -0x4, 0x1,  -0x2,
+                -0x2, 0x2,  -0x3, -0x3, 0x1,  -0x3, 0x3,  0x2,  0x1, 0x3,  -0x3, 0x3,  0x2,  -0x2, -0x2, 0x2,
+            });
+            try testArgs(@Vector(128, i3), .{
+                -0x1, 0x2,  0x1,  0x2,  0x1,  0x1,  0x2,  0x1,  -0x3, 0x1,  -0x2, -0x1, 0x0, 0x2,  0x3,  -0x3,
+                -0x1, -0x1, -0x2, 0x3,  0x1,  0x2,  0x2,  0x3,  -0x2, 0x3,  -0x2, -0x3, 0x1, 0x0,  -0x1, -0x2,
+                -0x1, -0x1, 0x1,  -0x1, 0x2,  -0x3, 0x1,  -0x2, -0x1, 0x2,  -0x2, 0x1,  0x1, -0x3, -0x2, 0x3,
+                -0x1, -0x1, 0x1,  -0x3, -0x1, -0x2, 0x0,  0x1,  -0x1, 0x0,  -0x1, 0x2,  0x1, 0x2,  0x3,  -0x1,
+                0x0,  0x1,  0x2,  0x3,  -0x2, 0x2,  -0x3, 0x2,  0x0,  -0x2, -0x3, -0x2, 0x0, 0x1,  0x2,  0x1,
+                -0x3, 0x0,  -0x2, -0x3, -0x1, 0x2,  -0x3, 0x2,  0x3,  0x1,  0x1,  -0x1, 0x2, 0x1,  -0x1, 0x2,
+                0x2,  0x0,  0x1,  -0x3, -0x3, 0x1,  0x2,  0x0,  0x1,  0x0,  0x0,  -0x1, 0x0, 0x0,  0x3,  0x3,
+                0x0,  -0x2, -0x2, -0x2, 0x3,  0x0,  -0x1, -0x2, 0x0,  0x3,  -0x3, 0x1,  0x2, -0x1, 0x1,  0x1,
+            }, .{
+                0x3,  0x3,  0x2,  0x3,  -0x3, -0x4, 0x1,  -0x3, 0x2,  0x3,  0x3,  -0x2, 0x2,  -0x1, 0x1,  0x3,
+                -0x4, -0x4, -0x1, -0x1, 0x2,  -0x3, -0x3, -0x3, -0x2, -0x3, -0x2, -0x1, 0x1,  0x3,  -0x4, -0x2,
+                -0x3, 0x1,  0x3,  0x1,  -0x2, 0x1,  0x1,  0x2,  0x1,  0x2,  -0x4, -0x4, -0x3, -0x2, -0x1, -0x2,
+                0x3,  0x2,  -0x3, -0x2, -0x2, 0x3,  -0x3, -0x4, 0x3,  -0x3, 0x3,  -0x1, 0x3,  -0x1, -0x1, 0x2,
+                0x3,  -0x4, -0x4, -0x2, -0x4, 0x2,  -0x2, -0x4, -0x2, -0x2, -0x3, 0x3,  0x2,  0x2,  0x2,  -0x4,
+                0x1,  -0x4, -0x3, -0x2, -0x4, -0x4, 0x1,  -0x4, -0x1, 0x3,  -0x2, -0x4, 0x3,  -0x4, 0x3,  -0x1,
+                0x3,  0x1,  -0x1, 0x1,  0x1,  0x2,  0x1,  -0x3, -0x4, -0x1, -0x1, -0x2, -0x3, -0x2, 0x3,  -0x2,
+                -0x1, 0x3,  0x3,  -0x1, 0x3,  0x1,  0x1,  0x2,  -0x3, 0x3,  -0x1, 0x3,  -0x3, -0x4, -0x4, 0x1,
+            });
+
+            try testArgs(@Vector(1, u3), .{
+                0x5,
+            }, .{
+                0x2,
+            });
+            try testArgs(@Vector(2, u3), .{
+                0x4, 0x5,
+            }, .{
+                0x2, 0x4,
+            });
+            try testArgs(@Vector(4, u3), .{
+                0x7, 0x7, 0x2, 0x3,
+            }, .{
+                0x4, 0x5, 0x7, 0x1,
+            });
+            try testArgs(@Vector(8, u3), .{
+                0x1, 0x5, 0x3, 0x7, 0x2, 0x5, 0x4, 0x7,
+            }, .{
+                0x5, 0x2, 0x3, 0x5, 0x5, 0x1, 0x3, 0x1,
+            });
+            try testArgs(@Vector(16, u3), .{
+                0x6, 0x5, 0x7, 0x4, 0x7, 0x2, 0x2, 0x3, 0x7, 0x6, 0x6, 0x5, 0x6, 0x4, 0x7, 0x5,
+            }, .{
+                0x6, 0x3, 0x5, 0x7, 0x4, 0x4, 0x4, 0x4, 0x6, 0x5, 0x3, 0x7, 0x4, 0x3, 0x3, 0x2,
+            });
+            try testArgs(@Vector(32, u3), .{
+                0x0, 0x6, 0x4, 0x3, 0x2, 0x4, 0x7, 0x5, 0x7, 0x5, 0x0, 0x6, 0x7, 0x2, 0x2, 0x2,
+                0x6, 0x2, 0x6, 0x5, 0x2, 0x3, 0x1, 0x0, 0x7, 0x1, 0x7, 0x0, 0x3, 0x1, 0x6, 0x2,
+            }, .{
+                0x2, 0x5, 0x3, 0x2, 0x2, 0x2, 0x5, 0x4, 0x4, 0x1, 0x7, 0x2, 0x2, 0x2, 0x5, 0x1,
+                0x2, 0x4, 0x3, 0x5, 0x5, 0x1, 0x5, 0x4, 0x7, 0x5, 0x4, 0x3, 0x1, 0x7, 0x5, 0x6,
+            });
+            try testArgs(@Vector(64, u3), .{
+                0x2, 0x3, 0x1, 0x0, 0x5, 0x6, 0x1, 0x2, 0x2, 0x3, 0x1, 0x1, 0x5, 0x2, 0x2, 0x5,
+                0x0, 0x0, 0x1, 0x1, 0x0, 0x6, 0x5, 0x2, 0x7, 0x3, 0x1, 0x1, 0x1, 0x0, 0x4, 0x7,
+                0x2, 0x6, 0x4, 0x0, 0x1, 0x1, 0x6, 0x5, 0x2, 0x0, 0x3, 0x4, 0x1, 0x4, 0x5, 0x2,
+                0x7, 0x4, 0x6, 0x6, 0x0, 0x2, 0x6, 0x2, 0x4, 0x6, 0x6, 0x5, 0x7, 0x0, 0x3, 0x6,
+            }, .{
+                0x7, 0x3, 0x3, 0x2, 0x6, 0x4, 0x3, 0x3, 0x7, 0x2, 0x3, 0x4, 0x7, 0x5, 0x2, 0x4,
+                0x6, 0x3, 0x6, 0x1, 0x7, 0x4, 0x1, 0x6, 0x7, 0x3, 0x1, 0x3, 0x6, 0x6, 0x5, 0x5,
+                0x2, 0x5, 0x7, 0x7, 0x4, 0x2, 0x2, 0x7, 0x4, 0x6, 0x6, 0x6, 0x4, 0x6, 0x2, 0x4,
+                0x3, 0x2, 0x2, 0x1, 0x7, 0x7, 0x4, 0x4, 0x2, 0x4, 0x7, 0x6, 0x7, 0x2, 0x2, 0x3,
+            });
+            try testArgs(@Vector(128, u3), .{
+                0x5, 0x2, 0x5, 0x4, 0x6, 0x0, 0x7, 0x2, 0x0, 0x6, 0x7, 0x4, 0x6, 0x4, 0x2, 0x6,
+                0x7, 0x3, 0x5, 0x6, 0x4, 0x5, 0x3, 0x0, 0x1, 0x5, 0x2, 0x0, 0x7, 0x2, 0x7, 0x5,
+                0x4, 0x6, 0x5, 0x4, 0x4, 0x3, 0x5, 0x7, 0x0, 0x2, 0x0, 0x6, 0x6, 0x1, 0x3, 0x3,
+                0x3, 0x7, 0x3, 0x3, 0x1, 0x0, 0x5, 0x3, 0x0, 0x0, 0x5, 0x5, 0x2, 0x4, 0x7, 0x4,
+                0x4, 0x1, 0x5, 0x0, 0x3, 0x2, 0x1, 0x3, 0x7, 0x3, 0x1, 0x4, 0x3, 0x1, 0x3, 0x2,
+                0x5, 0x7, 0x7, 0x2, 0x3, 0x7, 0x1, 0x1, 0x0, 0x7, 0x2, 0x5, 0x7, 0x0, 0x1, 0x4,
+                0x5, 0x6, 0x0, 0x1, 0x1, 0x4, 0x7, 0x5, 0x2, 0x3, 0x7, 0x7, 0x1, 0x3, 0x6, 0x4,
+                0x6, 0x0, 0x1, 0x0, 0x3, 0x7, 0x5, 0x4, 0x7, 0x4, 0x6, 0x5, 0x6, 0x6, 0x7, 0x4,
+            }, .{
+                0x5, 0x7, 0x5, 0x1, 0x7, 0x1, 0x3, 0x5, 0x1, 0x4, 0x3, 0x2, 0x5, 0x5, 0x2, 0x1,
+                0x4, 0x2, 0x2, 0x2, 0x5, 0x7, 0x1, 0x6, 0x2, 0x5, 0x2, 0x7, 0x2, 0x7, 0x4, 0x4,
+                0x1, 0x5, 0x4, 0x3, 0x2, 0x1, 0x1, 0x6, 0x3, 0x4, 0x7, 0x2, 0x7, 0x4, 0x1, 0x4,
+                0x3, 0x5, 0x3, 0x4, 0x6, 0x3, 0x7, 0x6, 0x6, 0x1, 0x7, 0x6, 0x3, 0x3, 0x5, 0x5,
+                0x7, 0x1, 0x1, 0x3, 0x3, 0x3, 0x1, 0x1, 0x1, 0x2, 0x6, 0x5, 0x3, 0x7, 0x1, 0x1,
+                0x5, 0x3, 0x1, 0x2, 0x7, 0x2, 0x5, 0x6, 0x4, 0x7, 0x3, 0x6, 0x5, 0x4, 0x3, 0x3,
+                0x5, 0x3, 0x7, 0x2, 0x3, 0x3, 0x7, 0x3, 0x1, 0x5, 0x3, 0x4, 0x7, 0x7, 0x5, 0x7,
+                0x1, 0x1, 0x2, 0x7, 0x2, 0x5, 0x1, 0x6, 0x4, 0x6, 0x1, 0x6, 0x5, 0x1, 0x2, 0x1,
+            });
+
+            // workaround https://github.com/ziglang/zig/issues/22914
+            // TODO: try testArgs(@Vector(1, i4), .{
+            //     0x2,
+            // }, .{
+            //     0x1,
+            // });
+            // try testArgs(@Vector(2, i4), .{
+            //     -0x2, 0x5,
+            // }, .{
+            //     -0x1, 0x2,
+            // });
+            // try testArgs(@Vector(4, i4), .{
+            //     -0x8, 0x5, 0x5, -0x2,
+            // }, .{
+            //     -0x3, -0x7, -0x4, -0x5,
+            // });
+            // try testArgs(@Vector(8, i4), .{
+            //     0x7, 0x3, 0x2, -0x1, -0x8, -0x2, 0x7, 0x1,
+            // }, .{
+            //     -0x2, 0x4, -0x8, 0x7, 0x1, -0x5, 0x6, -0x7,
+            // });
+            // try testArgs(@Vector(16, i4), .{
+            //     0x6, -0x3, 0x6, 0x6, -0x5, 0x6, 0x3, 0x7, -0x6, 0x7, -0x7, 0x6, -0x2, -0x2, -0x5, 0x0,
+            // }, .{
+            //     0x2, -0x3, -0x4, -0x5, 0x3, 0x3, -0x5, 0x5, 0x4, -0x1, -0x6, 0x4, 0x7, -0x2, 0x3, 0x2,
+            // });
+            // try testArgs(@Vector(32, i4), .{
+            //     -0x1, -0x4, 0x6,  0x6, 0x5,  0x3,  0x4, 0x0, 0x3, 0x7,  -0x6, 0x7, -0x2, -0x7, -0x4, 0x6,
+            //     0x3,  -0x7, -0x5, 0x1, -0x7, -0x6, 0x1, 0x3, 0x7, -0x8, -0x5, 0x6, -0x5, 0x0,  0x0,  -0x8,
+            // }, .{
+            //     -0x4, -0x4, -0x4, 0x4,  -0x5, 0x3, -0x1, 0x6,  0x1,  -0x3, -0x1, 0x6,  0x5,  -0x8, 0x1, -0x4,
+            //     -0x1, -0x4, 0x1,  -0x3, 0x4,  0x6, -0x3, -0x8, -0x7, -0x4, 0x2,  -0x3, -0x1, -0x2, 0x6, -0x6,
+            // });
+            // try testArgs(@Vector(64, i4), .{
+            //     0x0,  -0x3, -0x3, 0x5,  0x2,  -0x1, 0x4,  0x5,  0x6,  -0x2, 0x1,  0x5,  -0x3, -0x1, -0x2, -0x1,
+            //     -0x8, 0x2,  -0x1, -0x2, 0x7,  -0x3, -0x2, -0x3, 0x1,  -0x5, 0x5,  0x2,  -0x1, -0x6, -0x2, -0x1,
+            //     -0x2, -0x5, 0x0,  0x6,  0x3,  -0x4, -0x5, -0x5, -0x4, -0x7, -0x4, 0x1,  0x0,  -0x6, -0x7, -0x6,
+            //     0x1,  -0x6, 0x4,  -0x4, -0x2, 0x6,  -0x7, 0x4,  0x4,  0x5,  0x3,  -0x6, -0x8, -0x5, 0x5,  -0x7,
+            // }, .{
+            //     -0x1, -0x5, 0x5,  -0x2, 0x6,  -0x6, -0x4, -0x5, -0x4, 0x7,  -0x6, 0x7,  0x4,  -0x5, 0x5,  0x7,
+            //     -0x6, 0x3,  -0x4, 0x2,  -0x8, 0x4,  -0x2, 0x5,  -0x5, -0x5, -0x8, 0x3,  -0x1, -0x4, -0x8, -0x2,
+            //     -0x2, 0x5,  -0x7, -0x3, 0x2,  -0x5, -0x6, -0x7, -0x8, -0x2, 0x5,  -0x3, 0x2,  -0x1, -0x7, -0x4,
+            //     -0x3, -0x3, 0x6,  -0x8, 0x3,  -0x4, 0x7,  0x3,  -0x2, 0x7,  -0x1, 0x1,  0x1,  0x6,  -0x2, -0x2,
+            // });
+            // try testArgs(@Vector(128, i4), .{
+            //     -0x1, -0x3, -0x3, -0x4, 0x3, -0x4, 0x0,  -0x4, 0x7,  0x3,  0x5,  -0x4, -0x5, -0x4, -0x2, -0x7,
+            //     0x2,  0x0,  -0x4, 0x7,  0x3, -0x5, 0x4,  0x5,  -0x2, -0x3, -0x4, 0x6,  -0x7, -0x1, 0x1,  -0x6,
+            //     0x1,  0x5,  0x2,  0x5,  0x2, 0x2,  -0x4, -0x4, 0x5,  0x2,  -0x2, -0x8, -0x1, -0x2, 0x5,  0x3,
+            //     0x0,  -0x5, 0x5,  0x7,  0x6, -0x3, -0x2, 0x0,  -0x7, -0x7, -0x4, 0x2,  -0x4, 0x7,  0x1,  -0x5,
+            //     -0x4, -0x8, 0x2,  -0x7, 0x3, -0x4, 0x7,  0x6,  -0x7, -0x3, -0x7, 0x2,  0x4,  0x2,  -0x5, -0x6,
+            //     0x3,  0x5,  0x1,  0x6,  0x5, 0x7,  0x7,  -0x4, -0x7, -0x1, 0x0,  -0x7, 0x6,  0x0,  0x6,  0x0,
+            //     0x0,  -0x5, -0x1, -0x8, 0x7, -0x6, -0x5, -0x2, -0x4, 0x1,  0x1,  -0x8, 0x2,  0x6,  -0x1, -0x3,
+            //     -0x6, 0x5,  -0x8, 0x3,  0x3, 0x1,  -0x1, -0x3, -0x3, -0x6, 0x7,  -0x6, -0x8, 0x1,  -0x7, -0x8,
+            // }, .{
+            //     0x1,  0x3,  0x1,  0x3,  -0x6, 0x6,  0x2,  -0x3, 0x1,  -0x7, 0x7,  -0x3, -0x1, -0x1, 0x7,  0x2,
+            //     -0x8, 0x2,  -0x3, -0x4, 0x4,  -0x4, 0x7,  0x6,  -0x5, -0x2, -0x1, 0x6,  -0x7, 0x4,  0x7,  -0x3,
+            //     -0x5, -0x8, -0x5, -0x6, -0x6, 0x2,  0x1,  -0x8, 0x4,  0x3,  -0x5, 0x7,  -0x8, 0x3,  -0x1, 0x7,
+            //     -0x3, 0x7,  -0x3, -0x2, -0x6, 0x4,  0x2,  -0x2, -0x2, -0x7, 0x5,  -0x1, -0x6, 0x7,  -0x5, 0x5,
+            //     0x4,  0x5,  -0x8, -0x5, 0x6,  0x1,  -0x5, 0x7,  -0x6, -0x3, -0x4, 0x6,  -0x8, 0x7,  0x7,  -0x6,
+            //     0x6,  -0x4, 0x2,  -0x8, -0x8, -0x4, -0x8, -0x3, 0x6,  0x5,  0x7,  -0x6, 0x1,  0x2,  -0x7, -0x3,
+            //     -0x3, 0x1,  -0x3, 0x3,  -0x1, 0x3,  -0x7, -0x8, 0x1,  -0x3, -0x3, -0x3, -0x4, 0x5,  0x7,  -0x7,
+            //     0x3,  0x2,  0x6,  -0x2, -0x4, -0x3, -0x1, 0x5,  -0x6, 0x2,  0x3,  -0x5, 0x5,  -0x3, -0x2, -0x8,
+            // });
+            try testArgs(@Vector(1, i4), .{
+                -0x1,
+            }, .{
+                -0x1,
+            });
+            try testArgs(@Vector(2, i4), .{
+                0x7, 0x5,
+            }, .{
+                0x4, 0x1,
+            });
+            try testArgs(@Vector(4, i4), .{
+                -0x2, -0x3, -0x6, 0x1,
+            }, .{
+                0x3, -0x5, -0x1, -0x3,
+            });
+            try testArgs(@Vector(8, i4), .{
+                -0x5, -0x3, -0x2, 0x2, -0x4, -0x4, 0x0, 0x1,
+            }, .{
+                -0x7, -0x2, -0x7, 0x1, 0x6, 0x2, -0x7, 0x7,
+            });
             try testArgs(@Vector(16, i4), .{
-                0x7, -0x7, -0x6, -0x1, 0x3, -0x5, -0x3, -0x6, 0x4, 0x4, -0x2, 0x7, -0x2, 0x6, -0x4, -0x1,
+                -0x1, -0x1, 0x6, 0x3, -0x1, 0x2, -0x6, 0x6, 0x1, 0x5, -0x1, 0x7, 0x7, -0x3, -0x1, 0x4,
             }, .{
-                0x4, 0x1, -0x7, -0x2, -0x7, 0x4, -0x4, -0x8, -0x1, 0x1, -0x8, 0x5, -0x5, 0x3, 0x3, 0x2,
+                0x1, -0x3, 0x4, -0x1, 0x2, 0x5, -0x3, -0x5, -0x5, 0x5, -0x7, 0x7, 0x1, 0x5, -0x5, -0x6,
             });
             try testArgs(@Vector(32, i4), .{
-                0x0, 0x4,  0x0,  -0x6, -0x7, 0x4, -0x3, 0x4, -0x5, 0x2,  0x3,  0x2,  -0x6, -0x4, -0x4, -0x3,
-                0x7, -0x5, -0x3, 0x2,  -0x4, 0x4, -0x1, 0x6, -0x7, -0x1, -0x6, -0x2, -0x4, -0x2, 0x5,  0x0,
+                -0x7, 0x4, -0x5, -0x5, -0x5, 0x1,  0x7,  0x3,  -0x2, 0x7, -0x3, -0x7, -0x3, -0x2, -0x5, -0x7,
+                0x5,  0x4, 0x3,  0x0,  -0x6, -0x2, -0x4, -0x1, 0x5,  0x4, -0x7, -0x7, -0x5, 0x5,  -0x5, -0x1,
+            }, .{
+                0x4, 0x4,  0x5,  0x2,  0x3, -0x4, 0x6,  0x2, -0x7, 0x3, 0x7, 0x2,  0x6, 0x2, 0x2, 0x3,
+                0x7, -0x3, -0x7, -0x2, 0x3, -0x1, -0x4, 0x4, -0x8, 0x1, 0x6, -0x7, 0x5, 0x1, 0x7, -0x2,
+            });
+            try testArgs(@Vector(64, i4), .{
+                0x0,  0x3, 0x7,  -0x7, 0x1, -0x5, -0x4, 0x2,  0x2,  0x5,  0x4,  0x0,  -0x3, -0x4, -0x4, -0x4,
+                -0x7, 0x6, 0x2,  0x1,  0x0, 0x3,  -0x6, 0x4,  -0x4, 0x2,  0x7,  0x3,  -0x4, -0x3, -0x3, -0x3,
+                -0x4, 0x0, -0x3, -0x6, 0x0, 0x1,  -0x5, -0x7, -0x2, -0x1, -0x1, -0x7, 0x3,  -0x5, -0x4, -0x3,
+                0x5,  0x1, -0x4, 0x7,  0x1, 0x7,  -0x5, 0x4,  0x5,  -0x4, 0x1,  -0x4, 0x4,  0x5,  0x4,  -0x2,
+            }, .{
+                0x2,  -0x4, -0x1, 0x4,  0x4, -0x1, -0x3, 0x6,  0x4,  0x2,  0x4,  0x3,  -0x6, -0x7, -0x4, 0x6,
+                0x6,  0x4,  0x6,  -0x7, 0x2, 0x4,  -0x8, -0x1, -0x8, 0x3,  -0x2, -0x7, 0x1,  0x5,  -0x3, -0x6,
+                -0x8, 0x2,  -0x5, 0x7,  0x4, 0x7,  -0x4, 0x3,  0x6,  -0x7, -0x1, 0x1,  -0x8, -0x2, -0x3, 0x3,
+                0x3,  -0x4, 0x4,  -0x7, 0x5, -0x4, -0x1, 0x2,  0x6,  -0x2, -0x5, 0x2,  0x7,  0x2,  -0x8, -0x3,
+            });
+            try testArgs(@Vector(128, i4), .{
+                0x3,  0x3,  -0x6, 0x3,  0x2,  0x1,  0x5,  -0x2, 0x0,  -0x7, 0x4,  0x3,  0x0,  -0x3, -0x7, -0x5,
+                0x4,  0x0,  0x1,  0x6,  0x3,  0x3,  -0x4, 0x7,  -0x6, -0x6, 0x6,  0x1,  0x6,  -0x5, -0x6, -0x6,
+                0x4,  -0x2, 0x3,  -0x1, -0x4, -0x6, -0x1, 0x2,  0x4,  0x3,  -0x2, 0x2,  0x3,  0x5,  0x0,  -0x2,
+                0x5,  0x6,  -0x1, -0x5, 0x6,  0x7,  0x7,  -0x6, -0x6, 0x0,  0x0,  -0x6, -0x2, -0x7, -0x6, 0x3,
+                -0x6, -0x6, 0x4,  -0x4, 0x4,  -0x5, 0x0,  0x6,  0x6,  0x7,  0x4,  -0x5, 0x5,  -0x7, -0x7, 0x6,
+                -0x4, 0x4,  -0x7, 0x4,  0x3,  0x2,  -0x4, -0x1, -0x6, -0x2, 0x5,  -0x2, 0x4,  -0x7, -0x3, -0x3,
+                0x4,  0x1,  -0x1, 0x2,  -0x1, 0x4,  -0x5, -0x3, -0x1, -0x3, 0x3,  0x7,  0x5,  -0x1, -0x1, -0x4,
+                0x2,  0x1,  -0x3, 0x7,  -0x6, -0x7, -0x6, -0x4, -0x5, -0x2, 0x5,  0x0,  -0x5, 0x7,  -0x7, 0x5,
             }, .{
-                0x5,  0x1, 0x5, 0x7, 0x1, -0x3, 0x3,  0x3, 0x5, 0x4,  0x1, 0x5, 0x4,  -0x8, -0x3, -0x6,
-                -0x2, 0x3, 0x1, 0x2, 0x4, 0x4,  -0x8, 0x2, 0x6, -0x1, 0x1, 0x3, -0x1, -0x3, 0x7,  -0x7,
+                -0x6, -0x5, -0x1, 0x4,  0x6,  0x4,  -0x1, -0x7, -0x3, 0x4,  -0x6, -0x2, -0x3, 0x1,  0x6,  0x2,
+                -0x1, -0x3, -0x4, 0x2,  0x3,  0x6,  -0x7, 0x3,  0x7,  -0x6, 0x1,  -0x7, -0x5, 0x6,  0x7,  -0x4,
+                0x7,  -0x4, 0x2,  -0x5, 0x4,  0x5,  0x5,  -0x3, -0x1, 0x6,  -0x6, -0x2, -0x4, 0x3,  -0x4, -0x4,
+                -0x5, 0x6,  -0x6, 0x3,  -0x6, -0x3, 0x6,  0x5,  0x6,  -0x6, -0x3, 0x1,  -0x4, -0x5, -0x8, -0x3,
+                -0x4, 0x7,  -0x2, -0x1, 0x4,  0x2,  -0x3, 0x6,  0x4,  0x6,  -0x3, 0x6,  0x5,  -0x2, -0x1, 0x5,
+                0x6,  -0x6, -0x8, -0x2, -0x3, 0x6,  -0x6, 0x5,  -0x2, -0x6, 0x5,  -0x6, 0x2,  0x1,  0x3,  -0x3,
+                0x7,  0x7,  0x3,  -0x1, 0x5,  -0x2, -0x3, 0x4,  0x7,  -0x7, 0x7,  0x6,  -0x2, -0x5, 0x4,  -0x5,
+                -0x1, -0x2, 0x1,  0x4,  -0x2, 0x1,  -0x3, 0x1,  -0x4, 0x3,  -0x2, -0x6, -0x4, -0x2, -0x8, 0x2,
             });
 
             try testArgs(@Vector(1, u4), .{
-                0xe,
+                0x2,
             }, .{
-                0xc,
+                0xa,
             });
             try testArgs(@Vector(2, u4), .{
-                0x2, 0x5,
+                0x0, 0xa,
             }, .{
-                0x9, 0xe,
+                0xb, 0xa,
             });
             try testArgs(@Vector(4, u4), .{
-                0x2, 0xb, 0xc, 0x7,
+                0xb, 0x7, 0x0, 0xd,
             }, .{
-                0x2, 0xa, 0x8, 0x1,
+                0x4, 0x5, 0xf, 0x3,
             });
             try testArgs(@Vector(8, u4), .{
-                0xf, 0x9, 0x0, 0x6, 0x8, 0x7, 0xd, 0x7,
+                0x9, 0xf, 0x0, 0x5, 0x4, 0x9, 0x3, 0x7,
             }, .{
-                0xb, 0xb, 0x3, 0x6, 0x1, 0x5, 0x4, 0xd,
+                0xc, 0x6, 0x8, 0x8, 0x9, 0x8, 0x9, 0x2,
             });
             try testArgs(@Vector(16, u4), .{
-                0x5, 0x1, 0xa, 0x6, 0xb, 0x3, 0x0, 0x7, 0x8, 0x0, 0x9, 0xe, 0x2, 0x9, 0x2, 0x5,
+                0x0, 0xb, 0xd, 0x2, 0x8, 0xa, 0x6, 0x7, 0xa, 0xf, 0xf, 0x4, 0x9, 0x9, 0x9, 0xf,
             }, .{
-                0x4, 0x9, 0x4, 0x8, 0x5, 0x7, 0xf, 0x8, 0x3, 0xc, 0x6, 0x9, 0xd, 0xd, 0x2, 0xd,
+                0xd, 0x1, 0xf, 0x8, 0xb, 0xa, 0xe, 0x4, 0x5, 0x3, 0xd, 0x4, 0x1, 0xd, 0xd, 0xe,
             });
             try testArgs(@Vector(32, u4), .{
-                0xa, 0x5, 0xd, 0x4, 0xe, 0xf, 0xf, 0x2, 0xb, 0x3, 0x9, 0x2, 0x1, 0x9, 0x6, 0x8,
-                0x7, 0xc, 0x3, 0x5, 0x4, 0xb, 0x5, 0x4, 0x8, 0x2, 0x5, 0x9, 0xf, 0x6, 0x7, 0x7,
+                0x3, 0xc, 0x5, 0x1, 0xa, 0x6, 0x7, 0xe, 0x5, 0x8, 0x5, 0x6, 0xe, 0x0, 0xe, 0x6,
+                0x9, 0x5, 0x3, 0x6, 0xd, 0xe, 0x9, 0x4, 0xf, 0x1, 0x1, 0x5, 0x0, 0x2, 0xa, 0x0,
+            }, .{
+                0xd, 0x8, 0x7, 0xe, 0xa, 0x2, 0x5, 0x8, 0x5, 0x1, 0xa, 0x8, 0x1, 0x8, 0xb, 0x3,
+                0xb, 0xe, 0x5, 0xf, 0xb, 0x8, 0xd, 0x7, 0x6, 0x4, 0x7, 0x5, 0x5, 0x7, 0xf, 0x6,
+            });
+            try testArgs(@Vector(64, u4), .{
+                0x5, 0xd, 0x0, 0xd, 0x1, 0xb, 0xb, 0xe, 0xb, 0x7, 0xa, 0xc, 0xb, 0xe, 0x8, 0x9,
+                0x1, 0xb, 0x9, 0x5, 0xa, 0x6, 0xc, 0x5, 0x1, 0xe, 0x5, 0xb, 0x2, 0x8, 0x1, 0x4,
+                0x2, 0x6, 0x5, 0x1, 0x0, 0x5, 0xa, 0x5, 0xf, 0xf, 0x0, 0xb, 0x5, 0x4, 0xf, 0xb,
+                0x6, 0x0, 0xb, 0x4, 0x7, 0x8, 0xd, 0xf, 0xc, 0xc, 0x1, 0xe, 0x0, 0xb, 0xa, 0xd,
             }, .{
-                0xb, 0xf, 0xf, 0xf, 0xb, 0xf, 0xd, 0xc, 0x1, 0xa, 0x1, 0xd, 0x7, 0x4, 0x4, 0x8,
-                0x2, 0xb, 0xb, 0x4, 0xa, 0x7, 0x6, 0xd, 0xb, 0xb, 0x6, 0xb, 0x1, 0x8, 0xa, 0x6,
+                0xc, 0x5, 0xb, 0x3, 0x1, 0x5, 0xb, 0x1, 0x2, 0x1, 0x8, 0x4, 0xe, 0x1, 0xa, 0x7,
+                0x2, 0x9, 0x4, 0xd, 0xa, 0x5, 0x4, 0xe, 0x1, 0x4, 0xb, 0x2, 0x9, 0x7, 0x4, 0x2,
+                0x7, 0xd, 0x7, 0xb, 0xb, 0xf, 0xc, 0x5, 0xe, 0xf, 0x4, 0x8, 0x9, 0x5, 0x3, 0x6,
+                0x8, 0x4, 0x2, 0x5, 0x8, 0x2, 0x3, 0x5, 0x4, 0xf, 0x5, 0x9, 0x4, 0x8, 0x9, 0x8,
+            });
+            try testArgs(@Vector(128, u4), .{
+                0xe, 0x0, 0xa, 0xa, 0xf, 0x3, 0x3, 0x9, 0xe, 0x2, 0x7, 0x2, 0xf, 0x7, 0xf, 0x6,
+                0xa, 0x8, 0x0, 0x5, 0x6, 0x4, 0xf, 0x6, 0x5, 0xd, 0x0, 0xc, 0x3, 0xe, 0x3, 0x3,
+                0x5, 0x4, 0x8, 0x8, 0xb, 0x0, 0x7, 0x3, 0x8, 0xa, 0x8, 0x0, 0x8, 0x4, 0x7, 0x4,
+                0x9, 0x6, 0xa, 0x2, 0xe, 0x2, 0x0, 0x1, 0xe, 0xf, 0x9, 0x0, 0x9, 0x4, 0xb, 0xa,
+                0x1, 0x7, 0xf, 0xd, 0x6, 0x6, 0x2, 0x2, 0x1, 0xd, 0xe, 0x7, 0x5, 0xd, 0x9, 0x7,
+                0xd, 0xc, 0xc, 0x0, 0xa, 0xc, 0x1, 0xa, 0x5, 0x3, 0xf, 0xc, 0xf, 0x7, 0x1, 0xc,
+                0xa, 0x4, 0x3, 0xa, 0xc, 0x8, 0x2, 0xc, 0xf, 0x7, 0x3, 0x7, 0xf, 0x0, 0x0, 0x8,
+                0x7, 0x3, 0x4, 0x9, 0xb, 0xc, 0x5, 0x0, 0x1, 0x2, 0xa, 0x7, 0x9, 0x1, 0x3, 0x1,
+            }, .{
+                0xb, 0x1, 0x9, 0x3, 0x4, 0xb, 0xb, 0x4, 0xb, 0x7, 0x2, 0x7, 0x4, 0x5, 0x1, 0x4,
+                0x5, 0xa, 0xb, 0x4, 0x4, 0x2, 0xa, 0xb, 0xe, 0x4, 0x7, 0xb, 0xb, 0x4, 0x1, 0x6,
+                0xd, 0x3, 0xc, 0x7, 0x8, 0x1, 0x7, 0x6, 0xf, 0x9, 0x8, 0x4, 0x5, 0x2, 0x6, 0xe,
+                0xb, 0xd, 0x4, 0x6, 0x5, 0xb, 0x2, 0x8, 0x7, 0x2, 0xf, 0xe, 0x9, 0xe, 0xa, 0x5,
+                0x6, 0xc, 0xb, 0x1, 0x8, 0xc, 0xd, 0x3, 0x1, 0x4, 0x4, 0xf, 0x4, 0x3, 0x5, 0x7,
+                0xf, 0x3, 0x5, 0xf, 0xe, 0x2, 0xd, 0x7, 0x6, 0x2, 0x4, 0xd, 0xd, 0xa, 0x1, 0xa,
+                0xb, 0xa, 0xa, 0x2, 0x4, 0x9, 0x8, 0xa, 0xe, 0xb, 0xf, 0xf, 0x6, 0x4, 0x9, 0x8,
+                0x9, 0x6, 0x4, 0x5, 0xf, 0xe, 0x8, 0x5, 0x2, 0x5, 0xf, 0xb, 0xf, 0x4, 0x6, 0x4,
+            });
+
+            // workaround https://github.com/ziglang/zig/issues/22914
+            // TODO: try testArgs(@Vector(1, i5), .{
+            //     0x03,
+            // }, .{
+            //     0x0a,
+            // });
+            // try testArgs(@Vector(2, i5), .{
+            //     0x0c, -0x0e,
+            // }, .{
+            //     -0x0f, -0x0e,
+            // });
+            // try testArgs(@Vector(4, i5), .{
+            //     -0x0a, 0x06, -0x05, 0x09,
+            // }, .{
+            //     -0x0f, 0x05, 0x05, 0x09,
+            // });
+            // try testArgs(@Vector(8, i5), .{
+            //     -0x04, -0x04, 0x05, -0x05, 0x0f, -0x0e, 0x0f, -0x0e,
+            // }, .{
+            //     -0x09, -0x0d, 0x02, 0x01, 0x08, -0x05, -0x09, -0x03,
+            // });
+            // try testArgs(@Vector(16, i5), .{
+            //     -0x0e, -0x08, -0x10, -0x0b, -0x10, -0x09, -0x0f, -0x05, -0x10, 0x06, 0x0d, -0x04, 0x09, -0x0e, -0x10, -0x10,
+            // }, .{
+            //     0x03, 0x0b, 0x0c, 0x06, -0x0d, 0x0e, -0x09, -0x04, 0x0a, -0x0e, -0x0d, 0x0f, -0x09, -0x0e, -0x0b, 0x03,
+            // });
+            // try testArgs(@Vector(32, i5), .{
+            //     -0x08, -0x05, 0x09,  -0x08, 0x01, 0x0e, -0x0c, 0x0b, -0x0e, 0x0f,  -0x0b, 0x01, -0x03, 0x03, 0x08,  0x04,
+            //     0x02,  0x0f,  -0x0b, -0x0b, 0x0d, 0x00, 0x09,  0x00, -0x06, -0x08, -0x01, 0x0b, 0x05,  0x03, -0x05, -0x07,
+            // }, .{
+            //     -0x0c, 0x07,  0x0d,  -0x09, 0x0a,  0x06,  -0x0b, -0x07, -0x0a, 0x08,  0x07,  -0x0d, 0x08,  0x07,  0x09,  -0x07,
+            //     0x0b,  -0x02, -0x02, -0x02, -0x06, -0x08, 0x0a,  -0x0a, 0x02,  -0x07, -0x0a, 0x0d,  -0x07, -0x05, -0x0e, 0x05,
+            // });
+            // try testArgs(@Vector(64, i5), .{
+            //     0x04,  -0x0d, 0x0d,  -0x01, 0x07,  0x0c,  0x00, 0x01,  -0x07, 0x0a,  -0x01, -0x01, 0x08,  -0x0b, -0x03, -0x06,
+            //     -0x03, -0x03, -0x0c, 0x0e,  -0x0c, -0x02, 0x07, -0x03, 0x0e,  -0x0a, -0x0e, -0x06, -0x08, 0x0a,  -0x0c, -0x0c,
+            //     0x06,  -0x04, 0x04,  0x00,  0x05,  0x07,  0x04, 0x06,  -0x01, 0x0a,  0x07,  -0x08, 0x00,  0x0f,  0x0f,  0x0d,
+            //     -0x07, 0x0f,  0x05,  -0x0b, -0x08, -0x0c, 0x0d, -0x05, -0x05, 0x0e,  0x02,  0x06,  0x0d,  0x06,  0x00,  0x0a,
+            // }, .{
+            //     0x02,  -0x09, -0x01, -0x10, -0x0c, -0x0f, -0x10, -0x0d, 0x02,  0x0e, 0x07,  -0x01, -0x0a, -0x0b, 0x05,  -0x0e,
+            //     -0x09, 0x03,  0x08,  -0x0d, 0x0d,  0x03,  -0x02, 0x0e,  0x0c,  0x03, 0x0b,  -0x0d, -0x04, -0x10, 0x0e,  0x0d,
+            //     0x09,  -0x03, -0x0e, -0x03, -0x05, -0x0c, -0x07, 0x08,  -0x06, 0x08, -0x0e, 0x02,  -0x10, 0x01,  0x01,  -0x0a,
+            //     0x01,  -0x09, 0x03,  -0x01, 0x05,  0x09,  0x06,  -0x03, -0x0a, 0x08, -0x0e, 0x0e,  0x07,  -0x05, -0x0c, -0x10,
+            // });
+            // try testArgs(@Vector(128, i5), .{
+            //     0x01,  0x0b,  -0x01, -0x10, -0x05, 0x05,  -0x09, 0x0e,  -0x0e, 0x04,  0x0f,  -0x06, 0x0f,  0x04,  -0x02, 0x0a,
+            //     -0x08, -0x06, 0x08,  -0x07, -0x08, 0x0e,  0x06,  0x0d,  -0x07, -0x04, 0x04,  -0x0b, 0x02,  -0x06, 0x07,  -0x10,
+            //     0x0d,  0x09,  0x0b,  -0x04, 0x0e,  -0x06, -0x0a, 0x01,  0x06,  0x08,  0x01,  -0x0b, -0x09, -0x08, -0x0c, -0x0b,
+            //     0x07,  0x06,  0x0d,  0x0c,  -0x0b, -0x03, -0x06, -0x0c, -0x0e, 0x05,  0x0b,  0x08,  -0x01, 0x00,  0x01,  0x0a,
+            //     0x00,  0x0a,  0x06,  0x06,  -0x10, -0x05, -0x05, -0x0f, 0x02,  -0x06, -0x08, -0x08, 0x0f,  0x09,  -0x07, -0x05,
+            //     0x07,  0x06,  0x03,  0x05,  0x02,  0x0f,  0x0d,  -0x0e, -0x03, -0x01, -0x06, -0x02, -0x01, -0x07, 0x09,  0x05,
+            //     -0x07, -0x07, -0x08, 0x0c,  -0x0e, 0x09,  -0x0c, -0x0d, 0x07,  0x04,  0x07,  -0x03, 0x09,  0x0e,  0x04,  0x02,
+            //     0x0f,  -0x02, -0x10, -0x03, -0x0d, -0x04, 0x0c,  -0x06, -0x01, -0x0e, -0x0e, -0x0a, 0x0d,  -0x0e, 0x04,  0x03,
+            // }, .{
+            //     -0x08, -0x09, -0x04, 0x0f,  -0x0f, -0x08, -0x04, 0x0b,  0x09,  -0x0b, -0x02, 0x0f,  0x01,  -0x01, -0x0a, -0x0a,
+            //     0x08,  0x09,  0x0d,  -0x06, 0x0f,  -0x02, 0x0c,  0x01,  0x0c,  0x02,  -0x04, 0x0b,  0x05,  0x02,  -0x08, -0x09,
+            //     0x01,  0x0f,  -0x0b, 0x02,  -0x06, 0x08,  -0x0e, -0x02, -0x0b, -0x03, -0x01, 0x0c,  0x09,  -0x04, 0x08,  -0x0a,
+            //     0x09,  -0x05, 0x08,  0x0e,  0x05,  0x03,  -0x0a, 0x0d,  -0x03, 0x06,  0x0f,  -0x09, 0x0a,  0x03,  0x02,  0x0c,
+            //     0x08,  -0x0a, 0x06,  0x0e,  0x08,  0x02,  0x08,  -0x04, -0x0d, -0x02, -0x08, -0x0a, 0x0a,  0x0c,  -0x03, 0x04,
+            //     0x0b,  -0x0c, -0x0e, 0x01,  0x07,  -0x01, 0x09,  0x0f,  -0x06, -0x05, -0x0e, -0x01, -0x04, 0x0a,  -0x0a, -0x0d,
+            //     -0x10, -0x10, -0x03, -0x0f, -0x0c, -0x0a, -0x0b, -0x06, -0x04, -0x0f, -0x0b, -0x08, 0x0e,  0x04,  -0x01, -0x0b,
+            //     -0x06, 0x0a,  0x0a,  -0x0c, -0x0c, 0x0b,  -0x02, 0x0c,  -0x04, -0x06, -0x0c, -0x09, -0x09, -0x0b, -0x0c, -0x0b,
+            // });
+            try testArgs(@Vector(1, i5), .{
+                -0x0f,
+            }, .{
+                -0x03,
+            });
+            try testArgs(@Vector(2, i5), .{
+                0x0b, -0x04,
+            }, .{
+                -0x05, -0x08,
+            });
+            try testArgs(@Vector(4, i5), .{
+                0x08, 0x0f, -0x06, -0x0d,
+            }, .{
+                -0x03, -0x0a, -0x05, -0x03,
+            });
+            try testArgs(@Vector(8, i5), .{
+                0x08, 0x0c, 0x07, 0x00, -0x06, 0x08, 0x08, 0x0d,
+            }, .{
+                0x07, 0x09, 0x05, -0x08, 0x08, -0x0e, -0x0e, -0x0e,
+            });
+            try testArgs(@Vector(16, i5), .{
+                0x06, 0x0b, 0x07, 0x0e, 0x0e, 0x02, -0x08, -0x0b, -0x03, 0x09, -0x08, 0x0e, -0x03, -0x0a, 0x01, 0x0b,
+            }, .{
+                -0x05, 0x0f, -0x08, -0x10, -0x07, 0x06, 0x08, 0x01, 0x0a, 0x06, 0x05, -0x0f, 0x03, 0x05, 0x0b, -0x06,
+            });
+            try testArgs(@Vector(32, i5), .{
+                0x0e,  0x07,  -0x08, 0x0b,  0x04, -0x05, 0x0f,  0x02,  0x04, -0x0c, 0x0d, 0x09, 0x0e, -0x02, -0x0f, 0x04,
+                -0x03, -0x02, 0x09,  -0x0d, 0x01, 0x00,  -0x0e, -0x0a, 0x02, -0x06, 0x0c, 0x03, 0x06, 0x0e,  0x06,  -0x07,
+            }, .{
+                -0x0a, 0x01,  0x0a,  -0x10, 0x06, 0x09,  0x0e,  0x03,  0x0e, -0x09, 0x08,  -0x08, 0x06, 0x0c, -0x0a, -0x07,
+                0x04,  -0x02, -0x09, 0x0b,  0x0e, -0x03, -0x01, -0x0d, 0x0e, -0x0a, -0x02, 0x08,  0x0e, 0x02, -0x04, -0x05,
+            });
+            try testArgs(@Vector(64, i5), .{
+                0x04,  0x02,  -0x04, -0x0a, -0x0b, -0x0b, -0x0c, -0x08, -0x0b, -0x05, 0x0c,  0x0b,  0x05, 0x02,  -0x01, -0x06,
+                -0x09, -0x06, 0x0a,  0x0f,  -0x06, -0x0d, 0x0c,  0x05,  0x0b,  -0x06, -0x02, 0x09,  0x0e, -0x09, 0x05,  0x0a,
+                -0x0c, 0x0b,  -0x01, -0x04, 0x01,  0x0c,  0x0f,  -0x0f, -0x07, 0x0a,  0x04,  0x02,  0x08, -0x01, -0x03, 0x0a,
+                -0x0e, -0x04, -0x03, -0x0b, -0x01, 0x02,  -0x05, 0x08,  -0x03, 0x02,  -0x01, -0x05, 0x07, -0x09, 0x0f,  0x00,
+            }, .{
+                -0x09, -0x02, -0x0a, -0x05, -0x05, -0x0d, 0x04,  -0x05, 0x0e,  -0x01, -0x07, 0x04,  0x0d, -0x0f, -0x09, -0x04,
+                -0x0b, -0x0b, 0x0e,  0x01,  0x06,  -0x0f, -0x0b, 0x03,  -0x04, 0x05,  -0x03, -0x09, 0x0a, 0x04,  0x04,  -0x06,
+                -0x02, 0x01,  0x0e,  0x0c,  -0x10, 0x0f,  -0x09, -0x08, -0x10, -0x02, -0x07, -0x0d, 0x08, 0x07,  -0x03, 0x01,
+                -0x0d, -0x10, -0x07, 0x08,  0x0d,  0x07,  0x09,  0x0f,  0x04,  0x06,  -0x05, -0x09, 0x04, -0x04, -0x04, 0x0d,
+            });
+            try testArgs(@Vector(128, i5), .{
+                -0x02, -0x07, -0x03, 0x08,  -0x0b, -0x09, -0x0c, 0x0d,  -0x0e, 0x08,  -0x03, -0x09, 0x0a,  0x02,  -0x0d, 0x05,
+                -0x04, -0x0c, -0x07, 0x08,  -0x0a, 0x04,  0x08,  -0x07, 0x0c,  0x03,  0x0e,  0x0f,  0x08,  -0x08, -0x02, -0x0a,
+                -0x08, 0x04,  -0x0c, -0x09, 0x0a,  -0x0e, -0x04, -0x05, 0x02,  0x0d,  -0x04, -0x0f, -0x0d, -0x02, 0x05,  -0x07,
+                0x06,  0x0b,  -0x0b, -0x04, -0x04, -0x0e, -0x0c, 0x09,  0x02,  -0x0c, 0x0e,  0x09,  0x02,  -0x04, 0x05,  0x04,
+                -0x0e, -0x02, -0x08, 0x05,  0x00,  0x0f,  -0x0a, 0x0a,  0x0b,  0x0b,  0x02,  -0x02, -0x0d, -0x08, -0x0d, 0x03,
+                0x0b,  -0x0b, 0x02,  0x05,  -0x06, -0x07, -0x08, -0x0a, -0x09, -0x03, -0x0e, -0x0f, -0x0b, 0x03,  0x00,  0x09,
+                0x07,  0x0a,  -0x0b, 0x06,  0x01,  -0x03, -0x0a, -0x09, -0x07, -0x0d, 0x0a,  -0x0c, 0x01,  -0x09, -0x01, -0x01,
+                -0x09, 0x00,  0x0f,  0x05,  -0x0b, 0x0c,  0x04,  0x0c,  0x0b,  -0x08, 0x01,  -0x05, -0x04, 0x02,  0x0c,  0x04,
+            }, .{
+                -0x08, -0x05, -0x0a, 0x06,  -0x06, -0x04, 0x01,  0x07,  0x0d,  -0x0a, -0x02, 0x0b,  -0x01, -0x0c, -0x04, 0x0c,
+                -0x06, 0x0d,  0x04,  0x0b,  0x07,  0x0c,  0x02,  0x02,  -0x02, 0x09,  0x0d,  -0x07, -0x08, 0x07,  -0x04, 0x06,
+                -0x0a, 0x07,  0x01,  0x07,  -0x0d, 0x02,  0x09,  -0x02, 0x0e,  -0x08, -0x03, -0x07, 0x07,  0x0a,  -0x0e, -0x06,
+                0x0b,  -0x0d, 0x0c,  -0x06, -0x07, 0x0a,  0x0b,  -0x0a, 0x05,  0x01,  -0x0d, -0x08, -0x0e, -0x08, -0x07, -0x01,
+                -0x10, 0x07,  0x03,  0x06,  0x03,  0x0a,  0x08,  -0x0a, 0x04,  -0x09, 0x0c,  0x01,  -0x10, 0x02,  0x09,  0x0d,
+                0x0f,  -0x01, -0x06, -0x0a, 0x04,  0x01,  0x07,  0x07,  0x0e,  0x0e,  -0x04, -0x0a, -0x03, 0x07,  0x07,  -0x10,
+                -0x04, 0x0e,  -0x09, -0x0f, -0x0b, -0x0e, 0x0c,  -0x0d, 0x0b,  -0x06, 0x0f,  -0x0d, 0x0b,  0x04,  -0x10, -0x10,
+                -0x0e, 0x09,  0x0d,  -0x02, -0x0e, -0x06, -0x0b, -0x0f, -0x08, -0x0f, -0x0d, -0x0a, -0x02, 0x03,  0x0b,  -0x0a,
+            });
+
+            try testArgs(@Vector(1, u5), .{
+                0x0a,
+            }, .{
+                0x1c,
+            });
+            try testArgs(@Vector(2, u5), .{
+                0x01, 0x07,
+            }, .{
+                0x03, 0x1d,
+            });
+            try testArgs(@Vector(4, u5), .{
+                0x1c, 0x0d, 0x1f, 0x0a,
+            }, .{
+                0x14, 0x03, 0x07, 0x02,
+            });
+            try testArgs(@Vector(8, u5), .{
+                0x00, 0x0c, 0x1f, 0x01, 0x12, 0x14, 0x12, 0x10,
+            }, .{
+                0x0c, 0x14, 0x05, 0x1a, 0x04, 0x17, 0x06, 0x1a,
+            });
+            try testArgs(@Vector(16, u5), .{
+                0x1e, 0x17, 0x03, 0x16, 0x1f, 0x10, 0x00, 0x05, 0x19, 0x10, 0x18, 0x0d, 0x0f, 0x1b, 0x1e, 0x05,
+            }, .{
+                0x18, 0x13, 0x14, 0x12, 0x10, 0x11, 0x18, 0x0c, 0x03, 0x02, 0x11, 0x03, 0x17, 0x0c, 0x19, 0x05,
+            });
+            try testArgs(@Vector(32, u5), .{
+                0x1a, 0x10, 0x16, 0x1f, 0x08, 0x07, 0x1a, 0x04, 0x05, 0x16, 0x07, 0x09, 0x09, 0x03, 0x0f, 0x00,
+                0x03, 0x05, 0x1c, 0x00, 0x07, 0x16, 0x17, 0x1b, 0x0b, 0x01, 0x0e, 0x08, 0x15, 0x12, 0x04, 0x16,
+            }, .{
+                0x02, 0x14, 0x09, 0x17, 0x0c, 0x10, 0x1a, 0x1e, 0x14, 0x06, 0x03, 0x1b, 0x1d, 0x1f, 0x18, 0x0b,
+                0x16, 0x1b, 0x11, 0x14, 0x0d, 0x18, 0x05, 0x18, 0x16, 0x1a, 0x14, 0x04, 0x14, 0x1a, 0x0c, 0x1e,
+            });
+            try testArgs(@Vector(64, u5), .{
+                0x0c, 0x09, 0x0c, 0x05, 0x0e, 0x08, 0x0b, 0x07, 0x18, 0x05, 0x0a, 0x1e, 0x06, 0x14, 0x0d, 0x03,
+                0x0c, 0x0d, 0x00, 0x10, 0x0f, 0x05, 0x12, 0x0e, 0x0c, 0x1c, 0x16, 0x11, 0x14, 0x0b, 0x16, 0x06,
+                0x1e, 0x07, 0x00, 0x13, 0x09, 0x13, 0x1b, 0x03, 0x12, 0x1c, 0x0b, 0x04, 0x18, 0x0a, 0x18, 0x16,
+                0x16, 0x0a, 0x11, 0x19, 0x00, 0x1d, 0x08, 0x06, 0x0a, 0x1e, 0x09, 0x1d, 0x18, 0x1e, 0x06, 0x1b,
+            }, .{
+                0x0d, 0x10, 0x05, 0x18, 0x10, 0x0a, 0x06, 0x03, 0x1c, 0x10, 0x1c, 0x1e, 0x19, 0x1c, 0x04, 0x03,
+                0x0a, 0x1d, 0x1f, 0x10, 0x0e, 0x04, 0x0c, 0x18, 0x14, 0x05, 0x11, 0x19, 0x19, 0x14, 0x10, 0x06,
+                0x0b, 0x16, 0x0f, 0x01, 0x12, 0x0c, 0x1b, 0x03, 0x19, 0x0d, 0x12, 0x15, 0x15, 0x11, 0x16, 0x1f,
+                0x03, 0x11, 0x06, 0x11, 0x1d, 0x16, 0x0c, 0x17, 0x19, 0x1a, 0x06, 0x16, 0x13, 0x0a, 0x09, 0x1c,
+            });
+            try testArgs(@Vector(128, u5), .{
+                0x09, 0x16, 0x11, 0x1e, 0x17, 0x13, 0x1d, 0x14, 0x07, 0x15, 0x1c, 0x0b, 0x08, 0x19, 0x01, 0x0b,
+                0x1b, 0x19, 0x00, 0x0c, 0x0c, 0x0a, 0x10, 0x08, 0x1a, 0x1c, 0x03, 0x14, 0x1c, 0x0b, 0x18, 0x1f,
+                0x0c, 0x10, 0x17, 0x16, 0x05, 0x16, 0x0e, 0x03, 0x02, 0x0b, 0x13, 0x0b, 0x0a, 0x09, 0x0f, 0x01,
+                0x17, 0x0f, 0x1c, 0x13, 0x1d, 0x0b, 0x03, 0x0e, 0x01, 0x04, 0x1f, 0x18, 0x04, 0x07, 0x10, 0x05,
+                0x18, 0x09, 0x08, 0x16, 0x1e, 0x0e, 0x0f, 0x0c, 0x11, 0x0b, 0x0c, 0x13, 0x0f, 0x13, 0x13, 0x1d,
+                0x02, 0x01, 0x0f, 0x03, 0x17, 0x0f, 0x0f, 0x09, 0x0a, 0x1e, 0x1c, 0x0e, 0x0d, 0x17, 0x1e, 0x02,
+                0x07, 0x05, 0x0f, 0x0b, 0x01, 0x1f, 0x13, 0x17, 0x1a, 0x01, 0x1d, 0x12, 0x07, 0x01, 0x11, 0x10,
+                0x1a, 0x19, 0x02, 0x06, 0x1a, 0x1e, 0x13, 0x18, 0x11, 0x14, 0x19, 0x15, 0x1a, 0x1f, 0x01, 0x01,
+            }, .{
+                0x10, 0x04, 0x14, 0x13, 0x04, 0x0f, 0x1d, 0x17, 0x0a, 0x08, 0x17, 0x1a, 0x02, 0x0c, 0x0c, 0x12,
+                0x07, 0x0d, 0x11, 0x1c, 0x01, 0x17, 0x13, 0x1b, 0x11, 0x1c, 0x02, 0x16, 0x10, 0x0c, 0x16, 0x07,
+                0x02, 0x18, 0x03, 0x04, 0x1c, 0x1c, 0x18, 0x0c, 0x15, 0x14, 0x17, 0x1c, 0x1e, 0x0b, 0x1c, 0x1f,
+                0x18, 0x13, 0x02, 0x06, 0x18, 0x03, 0x1e, 0x12, 0x0a, 0x03, 0x07, 0x11, 0x09, 0x1a, 0x1c, 0x07,
+                0x10, 0x1b, 0x19, 0x0b, 0x1c, 0x10, 0x08, 0x05, 0x02, 0x09, 0x04, 0x02, 0x11, 0x05, 0x13, 0x1a,
+                0x0b, 0x1b, 0x1e, 0x11, 0x1e, 0x0e, 0x05, 0x18, 0x08, 0x0c, 0x04, 0x04, 0x06, 0x15, 0x05, 0x04,
+                0x14, 0x1f, 0x18, 0x0c, 0x04, 0x04, 0x03, 0x1c, 0x1e, 0x09, 0x1d, 0x08, 0x13, 0x16, 0x0d, 0x1c,
+                0x15, 0x16, 0x04, 0x02, 0x0c, 0x04, 0x13, 0x1d, 0x1d, 0x07, 0x1a, 0x1c, 0x15, 0x15, 0x19, 0x09,
+            });
+
+            try testArgs(@Vector(1, i7), .{
+                0x3b,
+            }, .{
+                0x0d,
+            });
+            try testArgs(@Vector(2, i7), .{
+                -0x3e, -0x06,
+            }, .{
+                -0x37, -0x0f,
+            });
+            try testArgs(@Vector(4, i7), .{
+                0x35, 0x29, -0x17, 0x39,
+            }, .{
+                -0x2c, -0x02, -0x29, -0x1a,
+            });
+            try testArgs(@Vector(8, i7), .{
+                0x1d, 0x25, 0x03, 0x1c, -0x12, -0x09, 0x1d, 0x3c,
+            }, .{
+                0x34, 0x33, 0x3e, -0x21, 0x13, 0x2d, 0x1f, 0x05,
+            });
+            try testArgs(@Vector(16, i7), .{
+                -0x12, 0x39, 0x11, 0x28, 0x29, -0x30, 0x08, -0x33, 0x2e, 0x03, 0x31, -0x2b, -0x1f, 0x37, 0x0f, -0x31,
+            }, .{
+                -0x0d, -0x1a, -0x22, -0x38, 0x30, 0x32, -0x38, -0x3b, -0x04, -0x2c, 0x27, -0x0d, -0x02, -0x2d, 0x18, 0x09,
+            });
+            try testArgs(@Vector(32, i7), .{
+                0x04, 0x09,  0x1e, -0x1b, -0x33, 0x1f, -0x08, 0x2d, -0x30, -0x3c, 0x11,  -0x2a, 0x29,  -0x16, -0x3b, -0x1e,
+                0x3a, -0x0d, 0x11, 0x16,  0x27,  0x0f, 0x2f,  0x14, 0x3b,  -0x2f, -0x3d, 0x1d,  -0x08, -0x2e, -0x2a, -0x33,
+            }, .{
+                -0x09, 0x3c,  0x15, -0x25, -0x03, 0x0a,  0x11, 0x03,  0x12,  -0x1d, -0x23, 0x29,  -0x30, -0x35, -0x0e, -0x15,
+                -0x1c, -0x14, 0x07, 0x29,  -0x0d, -0x2e, 0x12, -0x3b, -0x0c, 0x22,  -0x15, -0x2d, 0x04,  0x19,  0x14,  -0x1d,
+            });
+            try testArgs(@Vector(64, i7), .{
+                0x23,  0x09,  -0x13, 0x03, 0x19,  -0x35, -0x06, -0x3c, -0x31, 0x12,  0x09, 0x22,  -0x15, -0x1c, -0x25, -0x31,
+                0x3a,  -0x31, 0x05,  0x0b, -0x14, 0x38,  0x39,  0x15,  0x15,  -0x11, 0x11, -0x21, 0x2f,  0x3d,  -0x2e, -0x33,
+                -0x38, 0x1b,  0x14,  0x1d, 0x0d,  -0x16, 0x10,  -0x0a, 0x25,  0x0d,  0x1e, 0x1a,  -0x15, 0x21,  0x3e,  -0x0c,
+                0x03,  0x3f,  0x1f,  0x17, 0x2c,  -0x0a, -0x2b, 0x1f,  0x32,  0x09,  0x0f, -0x15, 0x24,  -0x35, 0x27,  -0x13,
+            }, .{
+                -0x20, 0x25,  0x38,  -0x28, 0x06,  -0x2f, -0x0b, -0x2b, 0x21,  -0x23, -0x08, 0x06,  -0x2a, -0x26, 0x33,  -0x2f,
+                0x06,  -0x1b, 0x3c,  -0x3d, 0x3e,  0x1f,  0x27,  0x28,  -0x05, -0x31, -0x29, -0x25, -0x13, -0x07, -0x3e, 0x3e,
+                0x0e,  -0x2c, -0x0f, -0x3b, 0x1b,  0x17,  0x3f,  -0x3a, 0x21,  -0x16, 0x11,  -0x0a, 0x1b,  0x2a,  0x27,  0x3e,
+                -0x09, -0x34, 0x20,  0x3e,  -0x29, -0x35, -0x0b, -0x20, -0x19, 0x21,  0x13,  0x22,  0x0c,  -0x1f, 0x38,  0x21,
+            });
+            try testArgs(@Vector(128, i7), .{
+                -0x38, -0x10, 0x08,  -0x2b, 0x3e,  -0x19, 0x39,  -0x1f, -0x27, -0x10, -0x0c, 0x02,  0x07,  0x10, -0x1a, 0x0b,
+                0x3c,  -0x2e, 0x1b,  -0x17, 0x3e,  -0x1b, -0x04, 0x35,  0x2d,  0x0d,  0x33,  0x0f,  0x18,  0x0b, 0x1a,  -0x09,
+                0x25,  -0x0b, -0x21, 0x3a,  -0x28, 0x12,  0x16,  0x03,  -0x1f, 0x1d,  -0x1c, -0x1a, -0x38, 0x32, 0x3d,  -0x2a,
+                -0x0b, -0x1b, -0x39, -0x0d, -0x20, -0x1e, -0x39, 0x02,  -0x13, -0x23, 0x2f,  0x0a,  -0x22, 0x15, 0x34,  -0x06,
+                -0x2e, -0x3b, 0x26,  0x38,  0x33,  -0x29, -0x0c, 0x2e,  0x07,  -0x19, 0x3c,  -0x35, -0x33, 0x39, 0x3c,  0x37,
+                -0x07, -0x13, 0x16,  0x05,  -0x27, 0x28,  -0x2b, -0x07, -0x25, -0x01, 0x1d,  -0x0a, -0x01, 0x1d, -0x2a, -0x30,
+                0x31,  0x0a,  -0x24, 0x0c,  0x35,  -0x1c, -0x04, 0x21,  -0x35, 0x12,  0x19,  0x3d,  -0x0c, 0x20, 0x28,  -0x22,
+                -0x3c, 0x09,  0x11,  -0x0c, -0x14, -0x39, 0x0f,  -0x40, -0x0a, 0x0f,  0x1b,  0x34,  -0x27, 0x35, 0x0e,  -0x3d,
+            }, .{
+                -0x1a, 0x06,  0x1f,  -0x24, -0x21, -0x3d, 0x1f,  -0x18, 0x2f,  -0x38, 0x2f,  -0x0f, 0x20,  0x2d,  0x31,  -0x09,
+                -0x1d, 0x3a,  0x24,  -0x09, 0x0c,  -0x3b, -0x35, -0x2a, -0x08, -0x2d, 0x29,  0x23,  -0x37, -0x05, 0x27,  -0x12,
+                0x23,  -0x14, 0x26,  0x36,  -0x33, -0x1d, -0x0f, 0x32,  0x2a,  -0x34, -0x31, 0x0a,  -0x33, -0x34, 0x30,  0x3d,
+                -0x1a, 0x0f,  0x16,  0x07,  0x2d,  -0x2b, 0x2c,  -0x2d, 0x34,  -0x07, 0x32,  0x3a,  0x2e,  -0x1c, 0x2e,  0x0f,
+                0x02,  0x33,  -0x1e, -0x05, -0x40, -0x0a, 0x3d,  -0x28, -0x21, -0x2b, -0x18, 0x02,  0x01,  -0x2c, -0x16, -0x3b,
+                0x27,  0x23,  -0x19, 0x13,  -0x2c, 0x3c,  -0x1a, 0x0e,  -0x25, -0x19, 0x06,  0x1b,  -0x3f, 0x26,  0x30,  0x0d,
+                -0x0e, -0x2f, -0x28, 0x3b,  -0x0e, 0x2d,  0x3d,  0x03,  0x0d,  -0x23, 0x02,  0x26,  0x0c,  0x31,  -0x10, -0x10,
+                0x37,  -0x38, -0x0f, 0x20,  -0x17, 0x02,  0x3e,  -0x40, 0x37,  0x22,  0x06,  0x14,  -0x31, -0x1e, -0x36, -0x2c,
+            });
+
+            try testArgs(@Vector(1, u7), .{
+                0x72,
+            }, .{
+                0x0c,
+            });
+            try testArgs(@Vector(2, u7), .{
+                0x0c, 0x3a,
+            }, .{
+                0x72, 0x0b,
+            });
+            try testArgs(@Vector(4, u7), .{
+                0x68, 0x2b, 0x52, 0x6e,
+            }, .{
+                0x74, 0x79, 0x10, 0x67,
+            });
+            try testArgs(@Vector(8, u7), .{
+                0x68, 0x30, 0x65, 0x49, 0x3f, 0x3c, 0x05, 0x1b,
+            }, .{
+                0x3f, 0x0e, 0x04, 0x50, 0x20, 0x07, 0x07, 0x4c,
+            });
+            try testArgs(@Vector(16, u7), .{
+                0x46, 0x73, 0x34, 0x68, 0x66, 0x0d, 0x69, 0x38, 0x7d, 0x40, 0x34, 0x22, 0x7b, 0x57, 0x76, 0x69,
+            }, .{
+                0x12, 0x45, 0x1b, 0x5d, 0x24, 0x36, 0x72, 0x70, 0x51, 0x1c, 0x23, 0x77, 0x7d, 0x7a, 0x20, 0x4b,
+            });
+            try testArgs(@Vector(32, u7), .{
+                0x1e, 0x74, 0x19, 0x6c, 0x74, 0x05, 0x6f, 0x08, 0x54, 0x56, 0x25, 0x40, 0x07, 0x2d, 0x42, 0x73,
+                0x5c, 0x1c, 0x60, 0x1e, 0x5a, 0x1d, 0x00, 0x33, 0x3b, 0x09, 0x28, 0x58, 0x66, 0x1d, 0x4e, 0x6b,
+            }, .{
+                0x22, 0x79, 0x72, 0x19, 0x19, 0x0b, 0x64, 0x6b, 0x15, 0x70, 0x10, 0x70, 0x73, 0x56, 0x48, 0x68,
+                0x01, 0x16, 0x1b, 0x68, 0x67, 0x09, 0x37, 0x36, 0x29, 0x25, 0x01, 0x7c, 0x58, 0x37, 0x61, 0x1c,
+            });
+            try testArgs(@Vector(64, u7), .{
+                0x79, 0x75, 0x24, 0x4a, 0x05, 0x1b, 0x28, 0x74, 0x43, 0x1c, 0x35, 0x06, 0x0d, 0x53, 0x07, 0x75,
+                0x37, 0x3b, 0x6c, 0x50, 0x5d, 0x52, 0x3f, 0x76, 0x3e, 0x57, 0x1e, 0x12, 0x31, 0x7b, 0x62, 0x6e,
+                0x30, 0x09, 0x1c, 0x0d, 0x3e, 0x52, 0x64, 0x6e, 0x23, 0x41, 0x2f, 0x4b, 0x69, 0x65, 0x67, 0x3f,
+                0x56, 0x6d, 0x4d, 0x35, 0x54, 0x7c, 0x63, 0x5d, 0x24, 0x49, 0x0a, 0x71, 0x55, 0x48, 0x3e, 0x4b,
+            }, .{
+                0x6a, 0x1d, 0x23, 0x7d, 0x12, 0x29, 0x23, 0x0b, 0x53, 0x3d, 0x39, 0x4b, 0x45, 0x05, 0x1b, 0x4a,
+                0x5c, 0x66, 0x38, 0x2d, 0x38, 0x70, 0x29, 0x5b, 0x32, 0x38, 0x39, 0x2e, 0x01, 0x3c, 0x15, 0x05,
+                0x1f, 0x28, 0x3a, 0x0f, 0x0a, 0x09, 0x11, 0x5e, 0x0a, 0x7a, 0x3f, 0x7d, 0x2c, 0x34, 0x63, 0x34,
+                0x1b, 0x61, 0x73, 0x63, 0x2c, 0x35, 0x25, 0x19, 0x09, 0x0c, 0x75, 0x5d, 0x01, 0x29, 0x3b, 0x0c,
+            });
+            try testArgs(@Vector(128, u7), .{
+                0x5c,
+                0x65,
+                0x65,
+                0x34,
+                0x31,
+                0x03,
+                0x7a,
+                0x56,
+                0x16,
+                0x74,
+                0x5c,
+                0x7f,
+                0x2a,
+                0x46,
+                0x2a,
+                0x5f,
+                0x62,
+                0x06,
+                0x51,
+                0x23,
+                0x58,
+                0x1f,
+                0x5a,
+                0x2d,
+                0x29,
+                0x21,
+                0x26,
+                0x5a,
+                0x5a,
+                0x13,
+                0x13,
+                0x46,
+                0x26,
+                0x1c,
+                0x06,
+                0x2d,
+                0x08,
+                0x52,
+                0x5b,
+                0x6f,
+                0x2d,
+                0x4a,
+                0x00,
+                0x40,
+                0x68,
+                0x27,
+                0x00,
+                0x4a,
+                0x3a,
+                0x22,
+                0x2d,
+                0x5b,
+                0x05,
+                0x26,
+                0x4e,
+                0x6f,
+                0x46,
+                0x4d,
+                0x14,
+                0x70,
+                0x51,
+                0x04,
+                0x66,
+                0x13,
+                0x4c,
+                0x7c,
+                0x67,
+                0x23,
+                0x13,
+                0x55,
+                0x1b,
+                0x30,
+                0x7d,
+                0x04,
+                0x47,
+                0x78,
+                0x05,
+                0x09,
+                0x5a,
+                0x20,
+                0x2e,
+                0x17,
+                0x11,
+                0x49,
+                0x6c,
+                0x5e,
+                0x34,
+                0x3e,
+                0x66,
+                0x60,
+                0x5d,
+                0x75,
+                0x48,
+                0x1d,
+                0x69,
+                0x67,
+                0x40,
+                0x2d,
+                0x7b,
+                0x31,
+                0x13,
+                0x60,
+                0x19,
+                0x2f,
+                0x3e,
+                0x7d,
+                0x23,
+                0x6a,
+                0x0e,
+                0x16,
+                0x44,
+                0x34,
+                0x5d,
+                0x5a,
+                0x2a,
+                0x0b,
+                0x64,
+                0x07,
+                0x22,
+                0x5b,
+                0x24,
+                0x22,
+                0x3b,
+                0x46,
+                0x23,
+                0x65,
+                0x5d,
+                0x34,
+            }, .{
+                0x4b,
+                0x36,
+                0x7a,
+                0x13,
+                0x5a,
+                0x4b,
+                0x69,
+                0x4b,
+                0x1d,
+                0x02,
+                0x1b,
+                0x3f,
+                0x61,
+                0x21,
+                0x45,
+                0x48,
+                0x44,
+                0x61,
+                0x25,
+                0x42,
+                0x57,
+                0x7d,
+                0x7a,
+                0x45,
+                0x22,
+                0x2e,
+                0x44,
+                0x3f,
+                0x3a,
+                0x14,
+                0x07,
+                0x6e,
+                0x68,
+                0x51,
+                0x03,
+                0x6b,
+                0x11,
+                0x32,
+                0x6d,
+                0x6f,
+                0x44,
+                0x5a,
+                0x61,
+                0x6d,
+                0x71,
+                0x66,
+                0x54,
+                0x14,
+                0x5d,
+                0x56,
+                0x22,
+                0x5c,
+                0x3a,
+                0x72,
+                0x16,
+                0x39,
+                0x59,
+                0x3e,
+                0x27,
+                0x4d,
+                0x3d,
+                0x44,
+                0x72,
+                0x2c,
+                0x71,
+                0x74,
+                0x3b,
+                0x6c,
+                0x70,
+                0x39,
+                0x0f,
+                0x5c,
+                0x71,
+                0x04,
+                0x67,
+                0x02,
+                0x2c,
+                0x18,
+                0x0f,
+                0x14,
+                0x2d,
+                0x24,
+                0x51,
+                0x34,
+                0x6d,
+                0x0c,
+                0x19,
+                0x0f,
+                0x73,
+                0x79,
+                0x3d,
+                0x74,
+                0x20,
+                0x15,
+                0x22,
+                0x25,
+                0x09,
+                0x14,
+                0x09,
+                0x71,
+                0x2d,
+                0x6f,
+                0x09,
+                0x2e,
+                0x27,
+                0x75,
+                0x57,
+                0x62,
+                0x4d,
+                0x07,
+                0x62,
+                0x01,
+                0x41,
+                0x2d,
+                0x5d,
+                0x4c,
+                0x77,
+                0x10,
+                0x7f,
+                0x30,
+                0x0f,
+                0x50,
+                0x15,
+                0x39,
+                0x34,
+                0x7c,
+                0x33,
+                0x16,
             });
 
             try testArgs(@Vector(1, i8), .{
@@ -18198,6 +19374,248 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
                 0x56, 0x4f, 0xf1, 0xaa, 0x0a, 0x0f, 0xdb, 0x1b, 0xc8, 0x45, 0x9b, 0x12, 0xb4, 0x1a, 0xe4, 0xa3,
             });
 
+            try testArgs(@Vector(1, i9), .{
+                0x002,
+            }, .{
+                0x0bd,
+            });
+            try testArgs(@Vector(2, i9), .{
+                0x00c, 0x0b1,
+            }, .{
+                -0x00b, -0x009,
+            });
+            try testArgs(@Vector(4, i9), .{
+                0x0b2, 0x02b, -0x09d, -0x03c,
+            }, .{
+                0x031, 0x078, 0x016, -0x08a,
+            });
+            try testArgs(@Vector(8, i9), .{
+                0x066, -0x03b, 0x007, 0x054, 0x0a7, 0x0ee, 0x00f, -0x0f8,
+            }, .{
+                0x01e, 0x0af, 0x047, 0x0d8, 0x002, -0x030, -0x01d, 0x003,
+            });
+            try testArgs(@Vector(16, i9), .{
+                0x0e7, -0x066, 0x079, -0x08d, -0x01a, -0x009, 0x0c8, 0x0c0, -0x070, 0x001, -0x00e, 0x014, -0x0f7, -0x07f, 0x0c8, -0x09a,
+            }, .{
+                0x0ea, -0x040, -0x045, -0x06d, 0x02c, -0x0b0, -0x0ba, -0x01a, 0x0af, 0x055, -0x015, -0x0fa, 0x0ca, 0x0f4, 0x007, -0x0a0,
+            });
+            try testArgs(@Vector(32, i9), .{
+                -0x003, 0x01b,  0x0b6, -0x009, 0x090, 0x047,  -0x00b, -0x0f2, 0x0f6, -0x09d, 0x0bf,  0x06a, -0x0e0, 0x03f, 0x007,  0x0a1,
+                0x009,  -0x0fb, 0x034, 0x0ba,  0x0cb, -0x0c9, -0x0ff, -0x0c1, 0x0d3, 0x029,  -0x076, 0x044, 0x0d4,  0x083, -0x002, 0x04e,
+            }, .{
+                -0x0cb, 0x0e3,  0x014, 0x02f,  -0x0da, -0x06a, 0x07f,  0x07d, -0x0ea, -0x014, 0x09a, 0x050, 0x017,  -0x00d, 0x041,  0x03e,
+                -0x096, -0x008, 0x075, -0x0bc, 0x0f9,  -0x0fc, -0x0a7, 0x0ef, 0x0f9,  0x066,  0x02f, 0x0d3, -0x0f0, -0x04a, -0x100, -0x0c6,
+            });
+            try testArgs(@Vector(64, i9), .{
+                -0x016, -0x0ae, 0x08b,  -0x0eb, -0x0b2, 0x02f,  0x039,  -0x0ba, -0x08d, -0x0a8, -0x0eb, -0x01a, 0x0eb,  0x0ca,  -0x049, 0x04e,
+                -0x019, -0x0d9, -0x0bd, 0x0ae,  -0x07d, -0x092, -0x0fb, -0x06c, -0x0e6, 0x0d9,  0x02c,  0x0cc,  0x093,  -0x022, 0x07a,  -0x093,
+                0x0e5,  -0x011, -0x003, 0x070,  -0x042, -0x0ad, 0x0be,  -0x038, -0x0bf, 0x098,  0x090,  -0x09e, -0x0a5, -0x0e1, -0x0e2, 0x039,
+                -0x035, -0x0e5, -0x054, 0x04c,  0x04b,  -0x09f, 0x091,  -0x039, 0x09b,  -0x029, 0x014,  -0x0d3, 0x06b,  0x0ae,  0x091,  0x082,
+            }, .{
+                -0x0bb, -0x0ec, 0x0fa,  0x055,  0x06f,  0x011,  -0x09d, 0x083,  -0x066, 0x014, 0x007,  0x002,  -0x0ee, -0x0d9, 0x0c3,  -0x087,
+                0x03c,  -0x065, -0x0cf, -0x075, -0x0c7, -0x0c1, 0x06b,  -0x0e3, -0x07a, 0x0b2, -0x0f8, -0x0fa, 0x001,  -0x0ba, 0x0c4,  0x0bb,
+                0x032,  0x01e,  -0x074, -0x058, -0x040, 0x0aa,  0x077,  0x028,  -0x061, 0x076, -0x04e, 0x01a,  -0x05f, -0x073, 0x0ea,  0x06e,
+                -0x069, -0x0a1, -0x041, 0x013,  -0x01c, -0x0f8, 0x053,  0x0f8,  -0x0c3, 0x058, -0x02d, 0x0f1,  -0x045, 0x04b,  -0x0b1, -0x0f3,
+            });
+            try testArgs(@Vector(128, i9), .{
+                0x0d4,  0x036,  0x0bd,  -0x046, -0x0a7, 0x09e,  0x0dd,  0x043,  0x098,  -0x09a, -0x06f, 0x0bb,  -0x0b7, 0x021,  0x0a3,  0x0f0,
+                0x069,  -0x08b, 0x0da,  0x016,  -0x049, 0x0d0,  0x07b,  0x004,  0x0ad,  -0x07c, -0x04e, -0x011, 0x01f,  -0x035, 0x028,  -0x0c9,
+                -0x0eb, 0x077,  0x08b,  -0x009, 0x024,  -0x058, 0x04e,  -0x0c0, -0x01d, -0x0a7, 0x088,  0x01b,  -0x0f3, -0x0c5, -0x08e, 0x0dc,
+                0x07d,  0x086,  -0x032, 0x0a9,  -0x00c, -0x06a, 0x06c,  0x032,  0x083,  0x0ec,  0x0ec,  -0x0a6, 0x029,  0x044,  -0x07f, 0x068,
+                -0x038, 0x0f6,  0x0b5,  -0x00d, -0x051, -0x0c6, -0x0af, -0x0eb, -0x0b6, 0x03c,  -0x037, 0x0cc,  -0x033, 0x08a,  -0x0b4, -0x039,
+                0x01e,  0x06c,  0x015,  -0x081, -0x029, 0x017,  -0x080, -0x01e, -0x081, 0x04f,  -0x071, -0x073, 0x0c3,  0x079,  0x0ad,  0x087,
+                -0x072, 0x067,  -0x064, -0x0d4, 0x0b4,  0x003,  0x0b1,  0x0bb,  0x0cc,  -0x0e9, 0x0e7,  0x015,  0x07b,  0x0e4,  -0x0ee, -0x07f,
+                -0x0bf, 0x0cd,  -0x056, 0x0ea,  0x0e5,  0x0fa,  0x0e1,  -0x087, 0x0fe,  0x017,  0x071,  -0x0d1, -0x053, -0x088, -0x0ef, 0x01b,
+            }, .{
+                0x0ea,  -0x018, 0x0ab,  0x039,  0x0ec,  0x0cc,  -0x033, -0x0e6, -0x037, -0x075, -0x055, -0x09a, 0x0bc,  0x099,  -0x03c, -0x0b4,
+                -0x0fe, -0x0ce, 0x0d6,  -0x084, 0x08f,  0x04b,  0x0cc,  -0x023, 0x01e,  -0x09c, 0x058,  0x0f4,  -0x0a7, 0x085,  -0x049, -0x050,
+                0x0f3,  -0x036, -0x0fe, 0x070,  -0x0a2, -0x081, -0x066, 0x057,  -0x017, -0x0c8, 0x070,  0x09b,  -0x0e4, -0x03b, -0x0d9, 0x081,
+                0x041,  0x0ec,  -0x062, 0x0b9,  -0x0d2, -0x02a, 0x0ab,  0x072,  0x001,  -0x082, -0x0cd, 0x0c8,  0x017,  -0x09d, 0x094,  -0x027,
+                0x09c,  0x024,  -0x0ec, 0x02f,  0x066,  -0x08e, 0x0ee,  0x099,  0x08e,  -0x0e5, -0x094, 0x0bb,  0x02f,  -0x0fe, -0x07e, -0x0ad,
+                0x05c,  0x066,  0x07e,  -0x0a9, 0x0fe,  -0x0e3, -0x068, 0x058,  -0x007, 0x0d6,  -0x0e8, -0x0d6, 0x038,  0x0b8,  -0x0b2, 0x0c1,
+                0x09a,  0x02f,  0x0d9,  0x07d,  0x0fc,  0x0f7,  -0x005, -0x01c, 0x0c2,  0x066,  0x064,  -0x096, -0x040, 0x065,  -0x00d, -0x063,
+                0x031,  -0x088, 0x090,  -0x077, 0x0e2,  0x0a8,  -0x0e0, -0x077, 0x0eb,  0x0c3,  -0x0ad, 0x008,  0x04e,  -0x095, -0x041, -0x0a6,
+            });
+
+            try testArgs(@Vector(1, u9), .{
+                0x09e,
+            }, .{
+                0x171,
+            });
+            try testArgs(@Vector(2, u9), .{
+                0x0bf, 0x042,
+            }, .{
+                0x154, 0x14b,
+            });
+            try testArgs(@Vector(4, u9), .{
+                0x0a5, 0x1ba, 0x1ef, 0x0b3,
+            }, .{
+                0x15d, 0x1d3, 0x00e, 0x13b,
+            });
+            try testArgs(@Vector(8, u9), .{
+                0x068, 0x125, 0x1ac, 0x105, 0x0cb, 0x14b, 0x18b, 0x07f,
+            }, .{
+                0x04a, 0x011, 0x0ad, 0x1d7, 0x1b8, 0x083, 0x16d, 0x052,
+            });
+            try testArgs(@Vector(16, u9), .{
+                0x00e, 0x0b4, 0x0d2, 0x149, 0x012, 0x17d, 0x13f, 0x1cb, 0x0f2, 0x145, 0x098, 0x005, 0x055, 0x141, 0x115, 0x01c,
+            }, .{
+                0x06c, 0x1da, 0x192, 0x0cf, 0x180, 0x0c2, 0x158, 0x0c6, 0x141, 0x105, 0x168, 0x165, 0x0aa, 0x0d5, 0x0a1, 0x03d,
+            });
+            try testArgs(@Vector(32, u9), .{
+                0x1bd, 0x05b, 0x1e1, 0x03e, 0x18b, 0x1ad, 0x102, 0x1bc, 0x0cd, 0x09f, 0x028, 0x057, 0x0cd, 0x14f, 0x02b, 0x00f,
+                0x140, 0x0b3, 0x155, 0x161, 0x1b6, 0x0ae, 0x13f, 0x1a7, 0x1b5, 0x0d4, 0x1f1, 0x1f5, 0x01c, 0x04b, 0x110, 0x0e2,
+            }, .{
+                0x027, 0x046, 0x00a, 0x035, 0x0ad, 0x10c, 0x010, 0x0ef, 0x096, 0x061, 0x016, 0x0cb, 0x17a, 0x0aa, 0x0d6, 0x1ad,
+                0x108, 0x0e3, 0x078, 0x020, 0x145, 0x0fc, 0x109, 0x04e, 0x13b, 0x02b, 0x11c, 0x125, 0x0f0, 0x185, 0x06b, 0x0b2,
+            });
+            try testArgs(@Vector(64, u9), .{
+                0x17b, 0x094, 0x1e8, 0x089, 0x0ec, 0x15d, 0x190, 0x0eb, 0x086, 0x091, 0x132, 0x074, 0x004, 0x142, 0x136, 0x066,
+                0x0a1, 0x1dc, 0x1d2, 0x026, 0x11e, 0x1eb, 0x1d5, 0x055, 0x047, 0x116, 0x0b7, 0x14a, 0x1ea, 0x067, 0x1c1, 0x19e,
+                0x13e, 0x11a, 0x16d, 0x0a6, 0x1b8, 0x0ef, 0x179, 0x076, 0x13e, 0x118, 0x0a3, 0x04e, 0x10a, 0x1bd, 0x186, 0x170,
+                0x172, 0x14f, 0x15e, 0x0f2, 0x1bc, 0x016, 0x189, 0x199, 0x0ee, 0x1ac, 0x0d8, 0x094, 0x19f, 0x0c8, 0x0f2, 0x06a,
+            }, .{
+                0x096, 0x19f, 0x094, 0x03d, 0x060, 0x164, 0x171, 0x101, 0x1ab, 0x172, 0x14b, 0x177, 0x1d6, 0x10d, 0x193, 0x13e,
+                0x1cf, 0x1be, 0x16a, 0x088, 0x0bb, 0x1bf, 0x052, 0x14c, 0x1fa, 0x060, 0x1c7, 0x073, 0x19d, 0x158, 0x1dc, 0x12d,
+                0x1c1, 0x15c, 0x10e, 0x16e, 0x1d2, 0x155, 0x0d1, 0x0e1, 0x126, 0x0bd, 0x081, 0x17e, 0x1f9, 0x1aa, 0x1ad, 0x0fe,
+                0x0f8, 0x158, 0x0ec, 0x00f, 0x033, 0x053, 0x033, 0x1e4, 0x05b, 0x072, 0x06b, 0x1a3, 0x157, 0x0ed, 0x1c8, 0x01b,
+            });
+            try testArgs(@Vector(128, u9), .{
+                0x13e, 0x0ad, 0x121, 0x0b1, 0x186, 0x0af, 0x058, 0x1b6, 0x16c, 0x0b0, 0x1e4, 0x1a2, 0x1f7, 0x1e1, 0x12c, 0x098,
+                0x0a5, 0x138, 0x1dd, 0x1d5, 0x0a0, 0x01e, 0x01e, 0x077, 0x0a9, 0x0f9, 0x12b, 0x153, 0x0bd, 0x0ac, 0x13e, 0x097,
+                0x062, 0x064, 0x091, 0x100, 0x0be, 0x196, 0x096, 0x183, 0x18f, 0x006, 0x07f, 0x14c, 0x0ec, 0x028, 0x0cd, 0x09c,
+                0x054, 0x0c7, 0x0cf, 0x019, 0x058, 0x0fa, 0x1ec, 0x1c4, 0x0d8, 0x0f7, 0x187, 0x1a5, 0x17f, 0x008, 0x087, 0x199,
+                0x1cd, 0x094, 0x100, 0x011, 0x050, 0x09d, 0x05e, 0x1f8, 0x0a7, 0x0a7, 0x0f7, 0x06b, 0x05e, 0x14f, 0x03c, 0x08c,
+                0x110, 0x16a, 0x08b, 0x1a3, 0x173, 0x1e0, 0x01a, 0x18a, 0x061, 0x0e8, 0x0d7, 0x0a6, 0x11b, 0x1fa, 0x004, 0x1fe,
+                0x045, 0x117, 0x0ab, 0x11a, 0x079, 0x1f6, 0x1bb, 0x0b6, 0x04a, 0x01b, 0x0d5, 0x0a6, 0x15a, 0x088, 0x0fa, 0x180,
+                0x0a4, 0x1fa, 0x17b, 0x117, 0x120, 0x110, 0x199, 0x109, 0x171, 0x1cb, 0x1cb, 0x0f3, 0x127, 0x1b2, 0x0e5, 0x152,
+            }, .{
+                0x137, 0x1c8, 0x1e2, 0x04a, 0x0f9, 0x0a7, 0x1d7, 0x1ba, 0x1a6, 0x035, 0x09b, 0x018, 0x1bd, 0x0fe, 0x08d, 0x029,
+                0x0d8, 0x1cc, 0x06f, 0x174, 0x132, 0x02b, 0x188, 0x15f, 0x036, 0x15e, 0x0bc, 0x1bd, 0x1b2, 0x0f1, 0x193, 0x0b7,
+                0x192, 0x03d, 0x0df, 0x1b7, 0x087, 0x14a, 0x137, 0x102, 0x117, 0x0de, 0x031, 0x03e, 0x1b0, 0x021, 0x0f4, 0x13e,
+                0x148, 0x0a7, 0x19c, 0x11e, 0x0e6, 0x0f1, 0x043, 0x1b3, 0x0c6, 0x1b2, 0x162, 0x098, 0x1c1, 0x0e7, 0x142, 0x032,
+                0x00d, 0x196, 0x124, 0x11e, 0x011, 0x19b, 0x023, 0x101, 0x0a1, 0x1ae, 0x03a, 0x0ec, 0x146, 0x020, 0x0c0, 0x0d7,
+                0x135, 0x152, 0x0fe, 0x08b, 0x193, 0x147, 0x0bf, 0x1c3, 0x0a2, 0x0c2, 0x0f7, 0x1c5, 0x1fe, 0x0a2, 0x033, 0x1ec,
+                0x043, 0x1a9, 0x1f5, 0x151, 0x04d, 0x176, 0x0df, 0x1f4, 0x09f, 0x054, 0x119, 0x0f8, 0x197, 0x0e9, 0x189, 0x196,
+                0x083, 0x1bb, 0x19b, 0x1a9, 0x15b, 0x136, 0x192, 0x08f, 0x0ba, 0x166, 0x178, 0x0c2, 0x0d0, 0x1b7, 0x181, 0x1e2,
+            });
+
+            try testArgs(@Vector(1, i15), .{
+                0x1309,
+            }, .{
+                0x1422,
+            });
+            try testArgs(@Vector(2, i15), .{
+                0x32e8, 0x3d81,
+            }, .{
+                0x195c, 0x13e8,
+            });
+            try testArgs(@Vector(4, i15), .{
+                -0x3485, 0x2320, -0x1725, 0x1e6e,
+            }, .{
+                0x2910, 0x3293, 0x3144, -0x3bbc,
+            });
+            try testArgs(@Vector(8, i15), .{
+                0x1c0d, 0x2f06, -0x0e9e, 0x230a, 0x0a7b, 0x19ae, -0x19b6, -0x2ace,
+            }, .{
+                -0x34a3, -0x342a, -0x0aaf, 0x1ece, 0x12fc, 0x0562, 0x0d22, -0x310f,
+            });
+            try testArgs(@Vector(16, i15), .{
+                -0x0abb, -0x1bbc, -0x3112, -0x23bf, -0x08b5, -0x1517, 0x1586, 0x06b2,
+                0x25ec,  0x3cf1,  0x07ea,  0x3972,  0x09d8,  -0x18a6, 0x06dd, -0x1c34,
+            }, .{
+                -0x0ec7, 0x1144,  -0x1a94, 0x255f,  -0x1fbb, -0x1500, -0x0e4f, 0x0b67,
+                0x1352,  -0x0d6b, 0x2f3e,  -0x086b, -0x19dc, -0x149b, -0x013e, 0x0ce6,
+            });
+            try testArgs(@Vector(32, i15), .{
+                -0x330a, -0x0a40, -0x2533, -0x1e99, 0x1aa6, -0x2587, 0x2778,  0x394a,
+                -0x0383, -0x2fb7, 0x04cf,  0x033a,  0x2bff, 0x3997,  -0x112c, 0x3a1a,
+                0x1adf,  0x270b,  0x182e,  -0x23f6, 0x1a33, 0x2644,  -0x0b41, -0x1c48,
+                0x1c2d,  -0x2a40, 0x007c,  0x1a62,  0x30d9, 0x0e4b,  0x32ee,  0x2b46,
+            }, .{
+                0x1af0,  0x286f,  -0x14fe, 0x2318,  0x002a,  -0x26b2, 0x350b,  0x0884,
+                0x3011,  0x276a,  0x2b2a,  0x22d3,  -0x1ece, 0x0143,  0x2f5b,  -0x0fa2,
+                0x2412,  -0x3d86, -0x3774, -0x09a5, 0x0fbf,  0x32f7,  -0x0a23, -0x3d5a,
+                -0x1523, -0x27c5, 0x097f,  0x2923,  0x3060,  0x113e,  -0x0643, -0x1287,
+            });
+            try testArgs(@Vector(64, i15), .{
+                0x0419,  0x1803,  -0x3897, 0x2b0c,  0x08a3,  -0x39d0, 0x174e,  -0x29c6,
+                0x0152,  -0x1078, 0x1113,  0x23bf,  0x0990,  -0x2777, 0x2ba4,  -0x058b,
+                -0x2d4a, -0x23ba, 0x3875,  -0x1720, -0x2625, -0x1c8f, 0x1f7c,  0x3f73,
+                0x3780,  -0x3043, -0x0d8d, 0x2ced,  0x091a,  0x3481,  -0x1917, -0x352f,
+                0x34c7,  0x322f,  -0x20ae, -0x0653, 0x1c82,  0x09a8,  -0x1a0b, -0x1dff,
+                -0x24c2, -0x2592, -0x3ff7, 0x1515,  -0x3d32, 0x1e9e,  -0x334d, 0x352b,
+                -0x2439, -0x3d0b, -0x2bcc, -0x2d29, 0x197c,  -0x2bad, -0x2682, 0x32cf,
+                0x31e4,  -0x085c, -0x0c84, -0x2f11, 0x03ba,  -0x0111, -0x2634, 0x344f,
+            }, .{
+                0x011a,  0x186c,  -0x2d7e, 0x29b1,  0x2cfb,  -0x077b, 0x3e8c,  -0x3a62,
+                0x3575,  0x35f0,  -0x2529, -0x3040, 0x398e,  -0x0c56, 0x2aa5,  0x0a72,
+                -0x0c36, -0x2c53, 0x275b,  -0x1155, 0x1a9d,  -0x34af, -0x3d4f, 0x14a0,
+                -0x0b88, 0x0b34,  0x2d60,  0x19ee,  -0x0ac4, -0x2f1b, -0x1e20, -0x2d8b,
+                -0x23f4, 0x0472,  0x1977,  -0x33f2, -0x301d, -0x1931, -0x1abe, 0x307f,
+                -0x2dcb, 0x2e99,  0x0dd1,  0x0377,  -0x3f91, -0x3719, 0x0248,  0x3c40,
+                -0x08d4, -0x2f12, -0x12ee, 0x3bc0,  0x3c4a,  0x1ff3,  -0x1096, -0x37e0,
+                -0x0879, -0x354f, -0x2277, 0x1ced,  0x0833,  -0x0f7e, 0x2070,  0x0d81,
+            });
+
+            try testArgs(@Vector(1, u15), .{
+                0x18c0,
+            }, .{
+                0x0c85,
+            });
+            try testArgs(@Vector(2, u15), .{
+                0x3697, 0x744b,
+            }, .{
+                0x60d5, 0x4172,
+            });
+            try testArgs(@Vector(4, u15), .{
+                0x7c31, 0x62c3, 0x7fe9, 0x4a52,
+            }, .{
+                0x28bf, 0x58a9, 0x09d5, 0x111f,
+            });
+            try testArgs(@Vector(8, u15), .{
+                0x3be1, 0x1928, 0x227e, 0x7ab4, 0x7e26, 0x4761, 0x586a, 0x4665,
+            }, .{
+                0x11b8, 0x4079, 0x39eb, 0x79d2, 0x7871, 0x5a40, 0x793c, 0x4a66,
+            });
+            try testArgs(@Vector(16, u15), .{
+                0x30fe, 0x6781, 0x6db6, 0x16f7, 0x736f, 0x1dca, 0x122e, 0x4e43,
+                0x41d8, 0x5b7a, 0x183b, 0x5036, 0x6a3a, 0x4301, 0x6c05, 0x5e7f,
+            }, .{
+                0x7dd5, 0x0897, 0x7f63, 0x0375, 0x5d05, 0x74c8, 0x0bc8, 0x6ac2,
+                0x5063, 0x335a, 0x283c, 0x452d, 0x6274, 0x2531, 0x1f90, 0x05c3,
+            });
+            try testArgs(@Vector(32, u15), .{
+                0x122d, 0x54a6, 0x7cf1, 0x5b48, 0x47e3, 0x6918, 0x0d81, 0x6074,
+                0x06d3, 0x0951, 0x40d8, 0x52db, 0x6258, 0x13fa, 0x3fe0, 0x0cdc,
+                0x6c69, 0x4fa8, 0x7bc7, 0x66e7, 0x1417, 0x368a, 0x46fc, 0x1850,
+                0x2a1d, 0x2622, 0x3877, 0x524a, 0x64b0, 0x6391, 0x2f16, 0x5b7c,
+            }, .{
+                0x4c22, 0x7689, 0x57ba, 0x04b5, 0x2720, 0x081e, 0x25e4, 0x3f89,
+                0x3065, 0x2d1e, 0x0386, 0x0f0c, 0x740a, 0x5fa5, 0x6b0a, 0x1fda,
+                0x2b3c, 0x5e71, 0x77c5, 0x3e29, 0x6a2e, 0x147e, 0x79a1, 0x77f6,
+                0x4bdd, 0x7fb1, 0x632c, 0x3898, 0x3dd3, 0x78b3, 0x75b9, 0x4960,
+            });
+            try testArgs(@Vector(64, u15), .{
+                0x2bb1, 0x0225, 0x151a, 0x056c, 0x0655, 0x3f5b, 0x5fea, 0x000a,
+                0x4f56, 0x7e08, 0x20b4, 0x4f64, 0x0da1, 0x74a0, 0x11b7, 0x38c7,
+                0x7a25, 0x6608, 0x50a7, 0x79b8, 0x5444, 0x4cc4, 0x110d, 0x1cf0,
+                0x5a2e, 0x4462, 0x03dc, 0x785a, 0x2d1c, 0x4592, 0x1855, 0x14c6,
+                0x2c4d, 0x7ae3, 0x7b45, 0x6cb0, 0x197d, 0x6fcc, 0x269e, 0x6f98,
+                0x7527, 0x7895, 0x0259, 0x2b3f, 0x181a, 0x5f50, 0x401d, 0x54d2,
+                0x2acc, 0x0aa8, 0x6822, 0x5d64, 0x3459, 0x5823, 0x4e62, 0x395e,
+                0x339f, 0x0b56, 0x25b8, 0x0c30, 0x5b3d, 0x7005, 0x0411, 0x074d,
+            }, .{
+                0x155c, 0x6c07, 0x5880, 0x1766, 0x661b, 0x5cfd, 0x1fb9, 0x67e1,
+                0x617c, 0x2bb4, 0x251b, 0x7ace, 0x4940, 0x584b, 0x708c, 0x3849,
+                0x0cdb, 0x3204, 0x4667, 0x7bee, 0x3279, 0x4c74, 0x7561, 0x2d6f,
+                0x5676, 0x530e, 0x39a1, 0x7c05, 0x1b23, 0x7bd7, 0x25ce, 0x7e97,
+                0x56c0, 0x0d59, 0x17f7, 0x6fed, 0x3b0e, 0x7470, 0x52a4, 0x5da3,
+                0x17c8, 0x2a51, 0x031f, 0x5879, 0x22bb, 0x674e, 0x3a55, 0x13a2,
+                0x1fef, 0x1cd8, 0x5067, 0x6602, 0x3d5b, 0x2f5e, 0x4b7f, 0x6cfc,
+                0x197d, 0x5afc, 0x4254, 0x07de, 0x6b37, 0x07d5, 0x4435, 0x0b29,
+            });
+
             try testArgs(@Vector(1, i16), .{
                 -0x7b9c,
             }, .{
@@ -18317,6 +19735,228 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
                 0x2c02, 0xff5b, 0x19ca, 0xbbf5, 0x870e, 0xc9ca, 0x47bb, 0xcfcc,
             });
 
+            try testArgs(@Vector(1, i17), .{
+                0x0538f,
+            }, .{
+                0x01de0,
+            });
+            try testArgs(@Vector(2, i17), .{
+                0x0cb5d, 0x00c0b,
+            }, .{
+                -0x0ef1d, -0x0797c,
+            });
+            try testArgs(@Vector(4, i17), .{
+                -0x06cbb, 0x08fcd, 0x05d91, -0x05824,
+            }, .{
+                0x0714b, 0x09218, -0x0c0d8, 0x000dd,
+            });
+            try testArgs(@Vector(8, i17), .{
+                0x0d8db, 0x0c58a, 0x09110, 0x0d637, -0x0a7e5, -0x00bc2, 0x08ffb, -0x0cf79,
+            }, .{
+                0x0a1ce, 0x0b491, 0x0aff1, -0x0b794, -0x085e7, 0x05c84, 0x040bc, 0x0f21f,
+            });
+            try testArgs(@Vector(16, i17), .{
+                -0x0ccb0, -0x04d27, -0x0199e, -0x06dae, 0x0b1a1, 0x05324, -0x0edee, -0x0e52d,
+                0x042d2,  0x06121,  0x0241f,  0x06833,  0x0a33b, 0x0f526, 0x0671a,  0x0c2a3,
+            }, .{
+                0x02be2, -0x08589, 0x0d95c,  -0x001cc, -0x03183, 0x08c1a,  -0x001db, 0x07604,
+                0x08d92, 0x094ad,  -0x08aa5, -0x0b495, -0x0d6cd, -0x0dff1, -0x027f1, -0x0214e,
+            });
+            try testArgs(@Vector(32, i17), .{
+                0x01222,  0x022bc,  0x042df,  0x02205,  -0x06de8, -0x0aaaf, 0x0fa4c,  -0x0c708,
+                -0x06edd, -0x0acbe, 0x0b01f,  0x003f5,  -0x0b82a, 0x0a189,  -0x04f4b, 0x02122,
+                -0x0debd, -0x0b05f, 0x091b6,  -0x074ff, 0x054e5,  -0x03355, 0x08ab0,  0x0c3c8,
+                -0x0f488, -0x04304, -0x0168e, -0x0224a, -0x0cbaa, 0x0ac99,  -0x0f096, 0x0e064,
+            }, .{
+                0x0d1c0,  0x02f93,  0x0e28c,  0x0862d,  -0x09e1e, -0x02247, -0x01b56, 0x06633,
+                0x0fdcc,  -0x0731f, 0x0e084,  0x0b865,  0x089ac,  -0x09e31, 0x0c730,  0x0af1d,
+                0x0c9b2,  -0x0bbbd, -0x0f0a4, -0x0aba7, 0x0e593,  -0x02c83, -0x04e28, 0x0f375,
+                -0x0e805, 0x0390f,  0x042a3,  -0x02aed, 0x03a5a,  0x070d3,  -0x0ed6a, 0x02b14,
+            });
+            try testArgs(@Vector(64, i17), .{
+                0x0be66,  0x0e4fb,  -0x0b918, -0x029b8, 0x019e8,  0x00621,  0x0e380,  0x040f6,
+                -0x0d095, 0x0b4d8,  -0x0a3ad, -0x0eaf2, 0x03bd3,  0x0635c,  0x02444,  -0x0830f,
+                0x01239,  -0x037ed, -0x071d1, 0x057e7,  -0x02cdb, 0x0504c,  0x0612c,  -0x005bf,
+                -0x04793, 0x03909,  0x0061c,  -0x06423, 0x040d6,  0x0bc6a,  -0x09204, 0x0e890,
+                0x04b98,  0x00257,  0x0dc85,  -0x0af2b, -0x0a1e7, 0x04ff6,  0x0b680,  -0x07c61,
+                -0x0eaff, -0x0da01, -0x04b21, -0x0088d, 0x068a8,  0x06b52,  -0x0d619, -0x09344,
+                -0x09b96, 0x0b81e,  0x04df8,  -0x012f6, -0x0c3bd, 0x067cc,  -0x0fa47, -0x05e93,
+                0x07d29,  0x00d87,  0x0de1f,  0x0d24f,  -0x0aede, -0x03414, -0x09a6c, 0x094dc,
+            }, .{
+                0x03d8e,  -0x0f297, -0x0d810, 0x05b8e,  -0x0630e, -0x0656f, 0x02f56,  0x0190b,
+                0x0a1e6,  -0x0783a, -0x00bde, 0x01bb2,  -0x093a5, -0x02b3f, 0x0198c,  0x0cc55,
+                0x04ec1,  -0x0ed31, -0x00a80, -0x0be6d, 0x0712b,  0x0451b,  0x067a4,  0x061cd,
+                0x0e799,  -0x06c74, -0x09b05, 0x0dc73,  -0x0a87d, -0x0cf60, -0x00f07, 0x0b101,
+                0x06d5b,  0x09d61,  -0x01092, 0x002ee,  0x0f192,  0x0024b,  0x04778,  0x06d05,
+                -0x0e460, 0x08524,  -0x0ba27, -0x0611e, -0x0d944, -0x0a3de, -0x0c278, 0x015e5,
+                0x071fe,  0x016d5,  -0x076e2, -0x035d8, 0x02763,  -0x0676f, -0x0a9aa, -0x0ab0b,
+                0x012de,  -0x00d05, 0x0f528,  0x07837,  0x0fc4e,  -0x06304, 0x0616f,  -0x0b10d,
+            });
+
+            try testArgs(@Vector(1, u17), .{
+                0x17ba6,
+            }, .{
+                0x1ac3a,
+            });
+            try testArgs(@Vector(2, u17), .{
+                0x1d26d, 0x18548,
+            }, .{
+                0x0c0eb, 0x1bbc8,
+            });
+            try testArgs(@Vector(4, u17), .{
+                0x1a01c, 0x12671, 0x175cc, 0x0ed36,
+            }, .{
+                0x141d6, 0x1b2bc, 0x1c2b9, 0x1eb18,
+            });
+            try testArgs(@Vector(8, u17), .{
+                0x0cb1b, 0x0f5ce, 0x1eba1, 0x04fdc, 0x0510f, 0x02c4c, 0x09310, 0x132df,
+            }, .{
+                0x1b732, 0x0b446, 0x048a7, 0x04c58, 0x03a0b, 0x19346, 0x07688, 0x1d4d5,
+            });
+            try testArgs(@Vector(16, u17), .{
+                0x1e6a3, 0x0eae5, 0x1065a, 0x18766, 0x1b70a, 0x1605b, 0x18256, 0x1e254,
+                0x0d926, 0x0f023, 0x1d9de, 0x14549, 0x051dd, 0x1e89e, 0x0baba, 0x00f38,
+            }, .{
+                0x1e050, 0x0f727, 0x1dfef, 0x151a6, 0x05593, 0x04a79, 0x1c54c, 0x147b6,
+                0x07173, 0x0480b, 0x094a6, 0x105ce, 0x0540c, 0x19d78, 0x15501, 0x1133a,
+            });
+            try testArgs(@Vector(32, u17), .{
+                0x0d98a, 0x1c869, 0x12b2b, 0x1fc00, 0x00b1b, 0x1c7b9, 0x09dd0, 0x1b560,
+                0x1f409, 0x18cdf, 0x04275, 0x07da6, 0x069e5, 0x12aa8, 0x0513a, 0x0dea5,
+                0x00df4, 0x1f8da, 0x0df92, 0x07885, 0x1c4d7, 0x14e64, 0x09648, 0x040cb,
+                0x04fc6, 0x122cb, 0x1022d, 0x1bbd5, 0x0fd59, 0x1978f, 0x17d5a, 0x06299,
+            }, .{
+                0x0086f, 0x023b6, 0x0d964, 0x0e90b, 0x1bd4b, 0x18f58, 0x09f26, 0x0a831,
+                0x00c03, 0x03ad1, 0x01c05, 0x1aded, 0x1d300, 0x12529, 0x14124, 0x1e684,
+                0x1b40d, 0x09328, 0x1a3b6, 0x1e492, 0x00f2a, 0x13b51, 0x1606e, 0x1d7f1,
+                0x0a5e6, 0x04172, 0x1aaea, 0x1e96f, 0x1c3ae, 0x11494, 0x06aac, 0x01dee,
+            });
+            try testArgs(@Vector(64, u17), .{
+                0x1b753, 0x10620, 0x0c1de, 0x1fa10, 0x118bf, 0x0a549, 0x06b32, 0x095dc,
+                0x177a2, 0x0aee7, 0x0f2cf, 0x118e0, 0x0b694, 0x0f270, 0x00917, 0x0048c,
+                0x1d903, 0x1de14, 0x10aa2, 0x06885, 0x1bba1, 0x0a5c5, 0x19373, 0x01355,
+                0x153f9, 0x18b94, 0x0e8a3, 0x0cc07, 0x0a014, 0x0f9ed, 0x02d95, 0x18388,
+                0x01de4, 0x0c8fa, 0x15858, 0x0ff57, 0x1fc97, 0x18d83, 0x11836, 0x0f136,
+                0x0d4e3, 0x1742d, 0x09f22, 0x088cf, 0x134f8, 0x1b9a8, 0x11fd2, 0x18428,
+                0x17411, 0x146e1, 0x0edea, 0x1d57d, 0x04059, 0x18b93, 0x10fc8, 0x01cd7,
+                0x12d54, 0x0cb27, 0x04fc3, 0x0d479, 0x0202c, 0x0cfab, 0x11e82, 0x000e7,
+            }, .{
+                0x0122f, 0x06698, 0x0c704, 0x012de, 0x0e36c, 0x0d81b, 0x00d34, 0x10ad6,
+                0x1f156, 0x00fca, 0x1f869, 0x1d14b, 0x13165, 0x1e11e, 0x1e60c, 0x00d18,
+                0x164bf, 0x1881f, 0x18a59, 0x14f13, 0x04ef2, 0x0e2a7, 0x021b0, 0x15884,
+                0x1ac75, 0x19969, 0x1353d, 0x073ec, 0x190ef, 0x1c777, 0x14b19, 0x12e43,
+                0x1b93f, 0x06daf, 0x02a1f, 0x1a801, 0x0facc, 0x132db, 0x13fb2, 0x00791,
+                0x11f11, 0x0ebc1, 0x0a376, 0x10e6d, 0x0321c, 0x154d7, 0x01180, 0x0cce1,
+                0x1a449, 0x0383b, 0x0d5bb, 0x0e5dd, 0x07e94, 0x08f78, 0x1c681, 0x1a146,
+                0x170db, 0x0da34, 0x1bd7f, 0x07a96, 0x0a017, 0x0b946, 0x0f98a, 0x0e9e5,
+            });
+
+            try testArgs(@Vector(1, i31), .{
+                0x2b94a60e,
+            }, .{
+                0x20451023,
+            });
+            try testArgs(@Vector(2, i31), .{
+                0x21d4d18c, -0x1f73454a,
+            }, .{
+                -0x18dcc667, -0x2e81b7f1,
+            });
+            try testArgs(@Vector(4, i31), .{
+                -0x1d8f56b6, -0x2beae9b1, 0x3d488b10, -0x14ce8669,
+            }, .{
+                -0x03a922a5, -0x0ea0c434, -0x029db0c1, -0x3b8d64f3,
+            });
+            try testArgs(@Vector(8, i31), .{
+                0x0ffaa22c,  0x15914f94,  -0x20cec195, -0x35e7b06a,
+                -0x1d212622, -0x2bb576e4, -0x0dede257, 0x1cc1066e,
+            }, .{
+                -0x178ffdb4, -0x10934a93, 0x08c3b058,  -0x1579a89f,
+                0x2340c302,  0x00280e85,  -0x38983c31, 0x0349891e,
+            });
+            try testArgs(@Vector(16, i31), .{
+                -0x11bc6f72, 0x1ca1ca00,  0x0b49c711,  -0x07fd7d21,
+                0x20ab59d2,  -0x07f45e94, -0x0d33151d, 0x065b8bff,
+                0x2231354e,  0x21ff00a7,  -0x35061bb0, -0x1135899e,
+                0x2ed1c690,  -0x1c1b598f, -0x19157726, -0x11c4d2c7,
+            }, .{
+                0x304dbbfb,  -0x3e59fd39, 0x1029151a,  -0x1e4d2063,
+                -0x3e164c14, -0x35fb3d09, -0x22070b0d, 0x1b730749,
+                0x380ae142,  0x357f1b30,  -0x17ccaa0d, -0x32cd12b4,
+                0x305256f7,  -0x298ce473, 0x244faaf4,  0x23450241,
+            });
+            try testArgs(@Vector(32, i31), .{
+                -0x2e3a0d66, -0x0df709be, 0x3bfd8b3f,  0x2a4f2d06,
+                -0x2b7ea7af, -0x28016bef, -0x34a3b4f9, -0x2dfdded7,
+                0x357e8c45,  0x0434b6b9,  0x28a3c5f9,  0x2d5b9944,
+                0x316614a1,  0x0c12a228,  0x0422665d,  0x33c0dec9,
+                0x0a7ede17,  -0x02e88ae9, -0x39e76560, 0x1e4b90af,
+                0x0a1527bb,  0x3a9f0405,  0x163b6eae,  -0x3ff84429,
+                0x1eb85fcc,  0x265f1f44,  0x2536ec34,  -0x30c952a2,
+                -0x1f7864e5, 0x033737cd,  -0x20b5718a, -0x0aad3a2f,
+            }, .{
+                -0x2455af85, 0x210b1040,  0x39915c7d,  0x2d56c08e,
+                0x1f318b8d,  -0x1e125926, -0x3faaabbb, -0x254d4da5,
+                -0x1b2ded0f, -0x27fa4874, 0x02c0d73b,  0x123e9344,
+                0x0351c023,  0x14cca255,  -0x2072b9d7, 0x1e624059,
+                -0x07d014a1, 0x2eda3228,  -0x300ff9b4, 0x333f25ad,
+                -0x3c653e21, 0x04b4a50e,  -0x20f17e80, 0x29063cd1,
+                0x2d52f6ad,  -0x0b2cdb6b, -0x2e4c9778, 0x303ded7c,
+                0x397162ee,  -0x2aa6708b, -0x0ef146b4, 0x04f36039,
+            });
+
+            try testArgs(@Vector(1, u31), .{
+                0x3ed1fb2d,
+            }, .{
+                0x1b75c3fd,
+            });
+            try testArgs(@Vector(2, u31), .{
+                0x38754d45, 0x04a454d9,
+            }, .{
+                0x7d06646d, 0x228e6c44,
+            });
+            try testArgs(@Vector(4, u31), .{
+                0x725a3790, 0x43680c3d, 0x058a6acf, 0x76172c1c,
+            }, .{
+                0x77fa9932, 0x7354fc00, 0x1756db7a, 0x559bf7c1,
+            });
+            try testArgs(@Vector(8, u31), .{
+                0x375a41f8, 0x761db971, 0x1c633348, 0x556c2682,
+                0x2478e967, 0x4fc61f7d, 0x0b0c0fbc, 0x3989659e,
+            }, .{
+                0x2cd6c7c1, 0x518c1da4, 0x2a52dd59, 0x0a9165dd,
+                0x5f2a31fd, 0x04dd2dba, 0x6eb0e7f6, 0x078c7a78,
+            });
+            try testArgs(@Vector(16, u31), .{
+                0x4d4ae18f, 0x3f131977, 0x337240bd, 0x4461dafc,
+                0x36bf5c5f, 0x527cca5e, 0x788a765b, 0x51da84b2,
+                0x58afe262, 0x289694c8, 0x7f3dc333, 0x05f123e9,
+                0x49182e11, 0x05ec0bb8, 0x0a760c6a, 0x4e74999f,
+            }, .{
+                0x107f6e90, 0x38d44d8e, 0x4b3adb3c, 0x7d6c21c0,
+                0x3ec0863b, 0x72422c85, 0x45e72de4, 0x07fc07d3,
+                0x7e30044d, 0x3ee5687d, 0x34037d8f, 0x1f3e1e71,
+                0x77aec6b0, 0x02db5151, 0x697fe49b, 0x49f9ad57,
+            });
+            try testArgs(@Vector(32, u31), .{
+                0x3b815f8c, 0x01c443d3, 0x22f036bf, 0x3d86e477,
+                0x3f631301, 0x51df4ff2, 0x7edd9a1c, 0x1b8d97fc,
+                0x7758837d, 0x23944d5a, 0x6b6fe951, 0x1cea3c27,
+                0x27033a47, 0x00b7643b, 0x407e47c9, 0x6004a994,
+                0x2efac78c, 0x22720791, 0x4308438b, 0x7776b2be,
+                0x139db08a, 0x4d9068a5, 0x4e26c811, 0x5e05d0a0,
+                0x0a651f83, 0x7f7a1fcc, 0x6b0f3eb0, 0x3467ea73,
+                0x4827410b, 0x3e48eece, 0x73a3abf5, 0x212b7737,
+            }, .{
+                0x13031751, 0x08fb38ec, 0x4aff2c4e, 0x25046a42,
+                0x0e9e35bf, 0x27349249, 0x54067ba1, 0x5a229b53,
+                0x6e68895f, 0x74f3d476, 0x6584d407, 0x0ef73f77,
+                0x2473e0ce, 0x3b936b7c, 0x2cf9dd51, 0x7100aa6b,
+                0x6dca745e, 0x739f6346, 0x32407063, 0x40de144d,
+                0x3dc73803, 0x3afedeab, 0x56cbbfe7, 0x4273c6db,
+                0x7b2eeb85, 0x6bf11881, 0x4e8148c7, 0x7b8daec4,
+                0x75c63050, 0x0001d08d, 0x7f14dd77, 0x13f23338,
+            });
+
             try testArgs(@Vector(1, i32), .{
                 0x7aef7b1e,
             }, .{
@@ -18423,6 +20063,208 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
                 0xf080e943, 0xc8718d14, 0x3f920382, 0x18d101b5,
             });
 
+            try testArgs(@Vector(1, i33), .{
+                0x0a9a3088e,
+            }, .{
+                0x06c76b26e,
+            });
+            try testArgs(@Vector(2, i33), .{
+                0x0a9bd1d56, 0x05b0b9015,
+            }, .{
+                -0x05af6217c, 0x0227b5d3a,
+            });
+            try testArgs(@Vector(4, i33), .{
+                -0x0405ee2ea, -0x0ff2c72eb, 0x0817f6727, 0x09093b663,
+            }, .{
+                -0x0ffdf18ee, 0x0956db821, -0x01ed194af, 0x059e085e9,
+            });
+            try testArgs(@Vector(8, i33), .{
+                0x09d4fea1c,  0x0cd4254ba,  0x008d5f732,  0x0566c6f55,
+                -0x01c2e54c3, -0x0469292fe, -0x00ba9ba6f, -0x076670146,
+            }, .{
+                0x02c01d901, 0x04407fcae,  -0x0e6a223a6, -0x0bd9499f8,
+                0x0f9da76ed, -0x07483b289, -0x0bfc2d58e, -0x078b3055e,
+            });
+            try testArgs(@Vector(16, i33), .{
+                -0x05a738cb0, -0x0be006f3e, 0x09271a365, 0x039d2f00d,
+                -0x0d502b660, 0x0dd465278,  0x042a7e451, 0x03c1c3671,
+                0x00eb6f4a9,  0x08982dbc4,  0x0421b8852, 0x015ee0e53,
+                0x0e6924014,  0x0c6ddbc65,  0x00260ea59, 0x0d98aaedf,
+            }, .{
+                -0x0d285a53d, 0x0800f42de, -0x048e48809, -0x052d65f47,
+                -0x0bda689e0, 0x0bc437a1b, -0x05cc595ba, 0x04b335861,
+                -0x0f5ec6456, 0x0580ceda6, 0x06f0e76c9,  0x0b0064ff1,
+                -0x0eae28371, 0x075c3c6b1, 0x07c8d26dd,  -0x06af4f476,
+            });
+            try testArgs(@Vector(32, i33), .{
+                0x07b887609,  -0x004a23b00, 0x09b664a97,  -0x0d4932ed0,
+                -0x01a63850e, -0x0a4298efc, -0x01e409b55, 0x01452fb7a,
+                0x03d12175b,  -0x0463cd854, 0x0cf448f1d,  0x0d1d02e2e,
+                -0x0da681c00, 0x0d1173267,  0x08faa4e2c,  0x0634c9df5,
+                0x037e682e2,  0x0db055022,  -0x0641f3daa, 0x053852c9b,
+                0x035822a2b,  -0x0b12bfe53, 0x084f704c9,  0x018cfacee,
+                -0x07130725b, -0x0b301dece, 0x00e1765b3,  -0x0e0f0c97c,
+                0x0ccd5e7fd,  -0x0ee60c481, -0x0c918345b, 0x04b2c6ec3,
+            }, .{
+                0x0df3d6e88,  0x00b4748ff,  -0x0d0381c05, -0x093d68cb5,
+                -0x027834cc7, 0x05aa9ca20,  -0x04bc88f40, 0x080f0d937,
+                0x06699a6b8,  -0x0fed64f1d, 0x0a79fe089,  -0x016a9c385,
+                0x0186e6b5b,  -0x0a3c83fe6, 0x09a4f87ec,  0x011ce03bf,
+                -0x0f742cb8c, 0x066be2e66,  -0x03b0beb52, 0x059bfda10,
+                0x04bc221c0,  0x07d8b0344,  -0x0c6e34f34, -0x0de0338ce,
+                -0x09571f80c, 0x0d36e8ea7,  -0x052c44147, 0x0072ce503,
+                -0x0ef8dec64, 0x0b5956cb3,  -0x02b72b4b1, 0x0f2585167,
+            });
+
+            try testArgs(@Vector(1, u33), .{
+                0x197ead992,
+            }, .{
+                0x0be595917,
+            });
+            try testArgs(@Vector(2, u33), .{
+                0x1485499a5, 0x1e12b23e3,
+            }, .{
+                0x1431cd300, 0x0762a7b51,
+            });
+            try testArgs(@Vector(4, u33), .{
+                0x00d6f907d, 0x19a2c1e5e, 0x18a597564, 0x0bea832ed,
+            }, .{
+                0x004f8c83b, 0x18fd5422c, 0x1b02cb79b, 0x092af8ba2,
+            });
+            try testArgs(@Vector(8, u33), .{
+                0x100a8bdce, 0x182aa3624, 0x0a0523393, 0x0cc8b944f,
+                0x0797fe181, 0x19c2ef2f6, 0x1b43977a0, 0x1513a878a,
+            }, .{
+                0x10da86327, 0x16e25c8c1, 0x036e09027, 0x1d85d870c,
+                0x0ff720340, 0x07d3901ec, 0x03df35db0, 0x0b3e4a05e,
+            });
+            try testArgs(@Vector(16, u33), .{
+                0x1c323b838, 0x03e15bdff, 0x0d11e109b, 0x152199f53,
+                0x1f3fc1542, 0x0e7b471e0, 0x0d291cc97, 0x1f5576bf6,
+                0x1c64d5f2e, 0x1468c9947, 0x18f1bb596, 0x0250829ac,
+                0x08d1b66a0, 0x1102178a6, 0x03eaf21e6, 0x1d0012275,
+            }, .{
+                0x11bcb3f84, 0x13150388c, 0x0e41a521a, 0x1c6c23e22,
+                0x130ac516c, 0x02d3a49c2, 0x1dd028aca, 0x1b83e56ef,
+                0x161d93875, 0x0a0fcb218, 0x1d27943a8, 0x09c919906,
+                0x182582997, 0x1c2acc0c7, 0x1cb8a9324, 0x0f456f948,
+            });
+            try testArgs(@Vector(32, u33), .{
+                0x1819f161e, 0x11b0c6f8b, 0x10e54ef82, 0x0f56ffe99,
+                0x1c128ddba, 0x0c70e8d84, 0x15e26011b, 0x1ed2f16e4,
+                0x1c498769a, 0x1b3a95b06, 0x0580ebb27, 0x16ef0aa01,
+                0x00a5a7986, 0x011a5fbf1, 0x092059f35, 0x065d9a218,
+                0x18b3c3508, 0x1f8a52f0b, 0x12a0c771c, 0x15c566333,
+                0x0882ec701, 0x0856047ee, 0x06974b33a, 0x049a97da9,
+                0x103730040, 0x0fabaaafc, 0x08e6b9887, 0x12e97722d,
+                0x00a2e302f, 0x144df5d90, 0x1dcc2f7d4, 0x1b6a6c079,
+            }, .{
+                0x13e4aa8fb, 0x1ff2fa13d, 0x0fd3d4549, 0x10837c43c,
+                0x1db62d7c2, 0x0e92f9f8b, 0x10c7ee602, 0x0e010e5f6,
+                0x1b216ca4f, 0x15808c554, 0x1ff8df1f7, 0x0c30cb60b,
+                0x191d83ae9, 0x17dc4326a, 0x1ff1e287e, 0x12e08bb58,
+                0x17787d83b, 0x074306807, 0x0ad4d40f7, 0x157b2e8a1,
+                0x1830cc0d0, 0x18e688eec, 0x1f87405f3, 0x19443ff22,
+                0x16ebfdd93, 0x07bb98b57, 0x01cd6f301, 0x08adbcc33,
+                0x1ffbcb919, 0x007455180, 0x1edbabfcb, 0x0b5519b97,
+            });
+
+            try testArgs(@Vector(1, i63), .{
+                -0x2d99033c3223ad4f,
+            }, .{
+                0x023c8c6807737a0e,
+            });
+            try testArgs(@Vector(2, i63), .{
+                -0x08fe3255607ce099, -0x3bf678cfa16a59d2,
+            }, .{
+                -0x1b8733d130c49d54, 0x39deb4fe6c836b3f,
+            });
+            try testArgs(@Vector(4, i63), .{
+                0x1e81cf3e0f9eae80, -0x0886f09bd1723b08,
+                0x16e84b8d985e5b82, 0x0fa327538c09a281,
+            }, .{
+                -0x2594908bb49f963f, -0x29639632db767665,
+                0x012d5330f966e1be,  -0x1143fddd48bf9752,
+            });
+            try testArgs(@Vector(8, i63), .{
+                -0x08e352cf330c1852, -0x17bc1f760120ff85,
+                -0x0f180e5c748c0e20, 0x07ee9290e2d53335,
+                -0x33945ea070fbb445, 0x104802af8984525d,
+                0x36d27ad0f35fcfd8,  0x292141a0133227a0,
+            }, .{
+                0x2adad30092da2886,  -0x1694bcdda9b82c45,
+                0x1f5a019d638ba22c,  -0x2e7853134888b613,
+                -0x2bb77a420f280a6d, -0x377771e94e493751,
+                0x1dd5373311160f2f,  -0x02bb5248b7e0c55e,
+            });
+            try testArgs(@Vector(16, i63), .{
+                0x2d930d47aa078416,  0x1edf9abe8d562bd5,
+                0x3ef3a5266f822396,  -0x102f82f23c5608e1,
+                -0x38755dccf6c87ae1, 0x09f11b107d033f85,
+                0x079829e968213db8,  0x17248ef600ddb53d,
+                0x19e16a7a4e6aa0cc,  0x11e21ddfb7b5b946,
+                0x26ad3768e80b1258,  0x3672a14b31cb7f1a,
+                0x3235b83f829966b4,  0x3b4009ac38f728b8,
+                -0x205d4b6cd8a164ad, -0x2fc581f11fa0eb42,
+            }, .{
+                -0x1428b8c4947715ea, -0x0a5626024843736e,
+                0x075a1a0d0d47f0af,  0x0ea460d282e8dcc0,
+                -0x124b2a6e2957dc53, -0x0d2602075af449d5,
+                -0x0db76b825400293c, -0x17c13cd693d2db13,
+                0x0ad907bb94e64687,  -0x05fd33e10be897ad,
+                -0x3210cf60aa544f0b, -0x10f80c3ee6d7c510,
+                0x0106683b57f2cf9e,  0x353bea8a4c199155,
+                0x3942af4e40b65cb0,  0x3da0254a739aa17a,
+            });
+
+            try testArgs(@Vector(1, u63), .{
+                0x17beef25621255fb,
+            }, .{
+                0x79bc7e82d16c5e15,
+            });
+            try testArgs(@Vector(2, u63), .{
+                0x58aeb180f136af8b, 0x0ed5f2cdb8ffe659,
+            }, .{
+                0x5df7aae04a4a1126, 0x53568966decbd14f,
+            });
+            try testArgs(@Vector(4, u63), .{
+                0x3d50a0f4755d87e3, 0x722e93c0b1355665,
+                0x3c8325a3e3640be1, 0x34eef2706884b9ab,
+            }, .{
+                0x2671797fd253520d, 0x22f81938e525536e,
+                0x08ca256b0b348d57, 0x1cdbe1867f422280,
+            });
+            try testArgs(@Vector(8, u63), .{
+                0x49ab492d75830041, 0x28f4065197f361cf,
+                0x3fcebb5fe8968a08, 0x0ab4e3fd7b158803,
+                0x7f517fcfc0451068, 0x05eaa5d2f93407b2,
+                0x0fe06447fdbec4d6, 0x34862504232f73d8,
+            }, .{
+                0x64a1796fe76dcc4d, 0x159bd1a9228a8c41,
+                0x093a4794b5759276, 0x40a740fa4d288585,
+                0x5d2d1aede616f40e, 0x7e7af17ddce8e03e,
+                0x7555fb4a1c18d5ff, 0x11b45e151e8724d0,
+            });
+            try testArgs(@Vector(16, u63), .{
+                0x456de088589c1035, 0x23239ed26d0e198f,
+                0x4f3c4ae380a12430, 0x1cb11ef73131a6f4,
+                0x6e51a370969ec7ae, 0x38bed7b267bb163d,
+                0x2fcfab012fb79669, 0x45e203406a43fe95,
+                0x38468cff64a44f74, 0x3cc86f1d717e8c60,
+                0x2ae5f2a7c73c6c2c, 0x0c8856138b43dff8,
+                0x1a7493c9bb7b265c, 0x6e8536e5f32317d8,
+                0x634701c32688fd34, 0x7a4e4a7f35ef9651,
+            }, .{
+                0x3da82f0beb7a091d, 0x040c9bbf428787fa,
+                0x795418c55742e8d2, 0x700f9b62c01cdf30,
+                0x78d567c18e7ce16d, 0x300da37dc14b6705,
+                0x68bf0e06ec9054ca, 0x2e45a80bcd5dd30d,
+                0x00e8c13b3acf4557, 0x19adb837145a0267,
+                0x594889dd8e1ff4c2, 0x561da6bd7e2ba593,
+                0x6a8ed2f67f586604, 0x2ce6d9d2663cb1fc,
+                0x68ec40831cb6b863, 0x2862d922ed7a78eb,
+            });
+
             try testArgs(@Vector(1, i64), .{
                 0x4a31679b316d8b59,
             }, .{
@@ -18519,6 +20361,188 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
                 0xed533d18f8657f3f, 0x1ddd7cd7f6bab957,
             });
 
+            try testArgs(@Vector(1, i65), .{
+                0x0ca0853f57c0686c8,
+            }, .{
+                -0x05c79d7369ef879fd,
+            });
+            try testArgs(@Vector(2, i65), .{
+                0x0c65f685f4839bc8d, -0x079057ad04859d897,
+            }, .{
+                -0x0dbb1951a67a71fc6, -0x0d4763ead1d5f66aa,
+            });
+            try testArgs(@Vector(4, i65), .{
+                0x0d6a03163f101695e,  -0x0ebe991e54e61156d,
+                -0x0715adf48176985dc, 0x01e57dbe6ea50b22f,
+            }, .{
+                -0x0b308d8311a45a38d, 0x07c292cc15044b1f5,
+                0x0e69e3eae81046bc8,  0x053b75d6a544ca0db,
+            });
+            try testArgs(@Vector(8, i65), .{
+                0x0066315a88896ba00,  0x026c8109f087eb4e3,
+                -0x0b9928ad2e41d98ef, 0x0fc5ab9c89a8ee6ff,
+                0x0dcdd248c4575dbb1,  -0x09db7d03c38a83255,
+                -0x097bc9d5397c57594, 0x0e6af866eed43b462,
+            }, .{
+                -0x06ac0448a06876866, 0x0f89dbafcbbb065d7,
+                -0x02b88b31ed9fc24dd, -0x005c56246687ed4f0,
+                -0x085a4a7b09dcec260, -0x0068e92e14823a98a,
+                -0x0ac4a04dd6de87eb9, 0x0716ed52ef9704b71,
+            });
+            try testArgs(@Vector(16, i65), .{
+                -0x09fd6493584cf3a50, 0x0e6dc4b5655cb9d36,
+                -0x03b55c156ac2bdcf1, 0x002cfefc233d5bcb8,
+                0x0dbebb830228d2945,  -0x02133deab2ebd8699,
+                -0x0ff9bc10c14c58c6c, -0x09170272ba214dabc,
+                -0x06ed685bdc535a55e, 0x0c12e1ca45cf7be9a,
+                -0x04e1094b79391df4a, 0x03b3fbc230416592d,
+                0x08799db1379e6b1ba,  -0x0fa7c7aed60863358,
+                -0x0c44dd44a770610bd, -0x0349cdc54719b2e37,
+            }, .{
+                0x03146cdf203a80cfd,  0x0e22a03fe80f3e2ed,
+                0x07fa7a66dbe252222,  0x000a3bc923a32648a,
+                -0x078bac9e36d66da71, -0x03055804cd3b73168,
+                0x0d9280808858f006d,  -0x09415dfb2fd33fe5a,
+                -0x01bb25a93961b763c, -0x0d7b9f64e9b0c5c82,
+                -0x096d7b6ee9a0b1e11, -0x0358047a2c33fc157,
+                0x0ac0128bbf7a5200e,  -0x009e0b2ab770e711b,
+                0x05473b5629f372ee9,  -0x02eda67313ff7fa47,
+            });
+
+            try testArgs(@Vector(1, u65), .{
+                0x1879059aca94dd383,
+            }, .{
+                0x0051da1f25078e919,
+            });
+            try testArgs(@Vector(2, u65), .{
+                0x18f39bb41f03223f5, 0x16a59f6838a63e737,
+            }, .{
+                0x105aa15beae036a1a, 0x1b47ef7ef744b70fe,
+            });
+            try testArgs(@Vector(4, u65), .{
+                0x18c685254b3c7170c, 0x0de8048a66902ebfd,
+                0x02bc97f62163e7e31, 0x152b6eba67c1e76db,
+            }, .{
+                0x1f45ab5e13037f07b, 0x1a6ac8ec084a115ee,
+                0x1db62793f956492ba, 0x0e4262599ec54c2a4,
+            });
+            try testArgs(@Vector(8, u65), .{
+                0x07c1dbda5d0ddd69a, 0x18f5741ef462a799b,
+                0x1fd2f93384860df65, 0x01827fcdb6c715d64,
+                0x03869c173a922b018, 0x0addd48a4671a2f6b,
+                0x1eee0f78995f9f118, 0x1e1d0d6b2396bcf38,
+            }, .{
+                0x159e9494fc84ed452, 0x0834f6aaa7666a22b,
+                0x066765389e84150b4, 0x1e722ae23908c7e96,
+                0x0d64ec725397e6ee0, 0x19f3a147a355baa22,
+                0x02f1b100538b6dbc3, 0x175885a34aefca91b,
+            });
+            try testArgs(@Vector(16, u65), .{
+                0x105647e12b2b76daa, 0x04dcca29537263f6a,
+                0x16c112620be731a4e, 0x0d6c088da3c158fa0,
+                0x02ff8ce4fc8331ec4, 0x127a7d10ab851980c,
+                0x05703068045915d95, 0x07cc42e0bb216b310,
+                0x08a15a16e4247ad98, 0x1c17b2292e34aa369,
+                0x14c9808748fa615c3, 0x187449666c2f5375b,
+                0x133fcb93a31d2f369, 0x047729af594c8c1c1,
+                0x1ce798ff51a064ad2, 0x0800a3c18b944f0e4,
+            }, .{
+                0x0d65b8b643703ef96, 0x1c55c2e0816c5d056,
+                0x0390a06d3ec60e632, 0x0d543d346db055847,
+                0x017e27c7d663d7005, 0x112f7b98a78014ea3,
+                0x030136142f19042d7, 0x059f9b6e576f79ef7,
+                0x1dd78fb3577c5ed37, 0x1a4594314b3f1adde,
+                0x1e26cd964c656292a, 0x0579c10261478da1d,
+                0x0406f2849ab5ad15c, 0x024b15c729f2211b7,
+                0x10f0505cc2f7f110c, 0x133cfa11f995e0afc,
+            });
+
+            try testArgs(@Vector(1, i127), .{
+                0x226b8faf65414a9a0ffcd438c7fa9eea,
+            }, .{
+                0x2c582610b08531ca208fef1c2b839bdc,
+            });
+            try testArgs(@Vector(2, i127), .{
+                0x35e8caffc9fc8e1b3b6d3667cb6a128c,
+                -0x070d99d51807ae2314ea61e4f0166145,
+            }, .{
+                -0x3a59d011c2a385a6dcf00a40efd85b77,
+                0x3b5f506c3fa0c8552fba624d1b5debec,
+            });
+            try testArgs(@Vector(4, i127), .{
+                0x191594cc2356ffeac739f1841b06adcf,
+                0x1f45176996076de7c3891f14f831e192,
+                0x1c9047002e0a4f00656556fffeb50349,
+                -0x38049f47bcc36ab26600bc475295389f,
+            }, .{
+                -0x2f8c1618324c60c40f65d216943d59d2,
+                -0x21429f90bd9dff7b5d49c9d7f2655928,
+                0x3baeef72d0d168fb50564c9f6eb5d778,
+                0x3affe6f2eddfc6a69206c357633d0eeb,
+            });
+            try testArgs(@Vector(8, i127), .{
+                -0x00c48629a415129f66e74a1a215d683a,
+                0x3920a425cbec4c9af649a00d5747136a,
+                0x11f53b9db15a12814c948c6a809b96f8,
+                -0x1f7e272db97efc88762dedf54978e795,
+                0x13e56ca8ed41f64d04ef01019703f402,
+                0x294014109f4313454d9994f1003b4572,
+                0x0f3d6fe7adde96149ffcc5c0808b708f,
+                0x2d9bcd407da37ff3d43cc5f6b64fd385,
+            }, .{
+                0x3f95f21d3bc39fe8e3fcc184d150a984,
+                0x2a8c36a5986d8c245bbdd302737b7e29,
+                0x37c61446e10efe6a94f797da05a28fae,
+                -0x096b2f4e16aef099d623066e941d13d1,
+                0x3df11d4af3229ed59c52628cac02f506,
+                0x326f78cbf454566daa3bc235a1fb3fb8,
+                -0x2cf6b4872dffea018c77892e433b6784,
+                0x345dfdd52635c224c70949913255ab68,
+            });
+
+            try testArgs(@Vector(1, u127), .{
+                0x49a0cd1849f9adbed215770c6f97a584,
+            }, .{
+                0x5e10826a03aeb57d4a9a9ef2a8f02faa,
+            });
+            try testArgs(@Vector(2, u127), .{
+                0x1c2544fbb76890de5c00f42c9a516846,
+                0x324f292d72694d409152a311b5a0441c,
+            }, .{
+                0x01cc87106db8e357e85f875d46feac96,
+                0x49e775cc0db88cf9725af13113d7d457,
+            });
+            try testArgs(@Vector(4, u127), .{
+                0x3e82ddcd074646a0a489a4fd300c32c9,
+                0x2a511ac041c17a68c5a71bc6d3cb3ba9,
+                0x2dace4189083411634b753ae476579a8,
+                0x4e1d5cb04d9681d806312d72d6dc5262,
+            }, .{
+                0x5f489e689ff15fcf38aad995b1796af2,
+                0x49ee549bd8e20092c8ccebb992cde8b8,
+                0x4e52d33281cba3fda6ae8d1f463c7a1f,
+                0x0de0279b2dec3fffe44c1c7decc430f8,
+            });
+            try testArgs(@Vector(8, u127), .{
+                0x636de193fbadb1984a0ed9969f88d38d,
+                0x64426b7e468cb323b1d75656879fb9b2,
+                0x48afb4cc5a11f2ca4b8609b057758312,
+                0x176157ce93422bb4463d6f0dda275b94,
+                0x746015d0e8cb5e36af43840a6df11aab,
+                0x279b665776118bc2759134c19cbf1bb0,
+                0x52cb4dc56d3935090fb7db710c8f9660,
+                0x591884d8d8e2fe2f77b7f8508dddeaaa,
+            }, .{
+                0x42e00bc05d50ea63d546085642b8831b,
+                0x4241ff07ce99ee055b48ed2939b8d6b7,
+                0x45a5f53a5c5cb13f1a9e6621fec8cf4a,
+                0x68d6938c1b348dc6cc98d4b6ab3a1c22,
+                0x1f9448fc11e38500ec7ecf57a33a278b,
+                0x7b331526d6fcfb958f3c88fc4656e123,
+                0x4f1e8ddf41a7105cc1a1c815040a2693,
+                0x31ac7bd68686d531d53ecca75e6d8b81,
+            });
+
             try testArgs(@Vector(1, i128), .{
                 -0x3bb56309fcad13fc1011dc671cf57bdc,
             }, .{
@@ -18605,6 +20629,398 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
                 0xa858bce5ad0e48c13588a4e170e8667c,
             });
 
+            try testArgs(@Vector(1, i129), .{
+                -0x09c126c4e31389b174cb2b45e76c086dd,
+            }, .{
+                0x0d023f2566c56400f4b7edb9c4f364ecb,
+            });
+            try testArgs(@Vector(2, i129), .{
+                -0x072be32e116741732f9422d9b20777db5,
+                -0x010829386cc3d93283a83591bd994ca58,
+            }, .{
+                0x004671167d0681d46945832d16f70cfba,
+                0x098550137341cbadfe8378987e3a83265,
+            });
+            try testArgs(@Vector(4, i129), .{
+                0x04167620c7f5e094208fb204f06b9792c,
+                0x07a6690c0b5f9de6c8955873f19b98f23,
+                -0x06505997d01e5a971f820742210c7adc6,
+                -0x07013921eab8d92559f17ad9e3712bc61,
+            }, .{
+                -0x0bdbf7ce4079fa628feb05f5814402e15,
+                -0x0e6ee5464de547ce92ee5a6037f175e52,
+                -0x009d079b88cdc765ab72758854b753d98,
+                -0x085af31dd5243f61b46e6dc950728ff49,
+            });
+            try testArgs(@Vector(8, i129), .{
+                0x0d85f471bdc56611e9187113f1489bd38,
+                0x06e1e9cae044bb17b63c380fd6f2652b5,
+                0x0921d649288a16481f2ec69747f443e5e,
+                0x0e8db24d1aefd91cfab20bac61f560274,
+                0x0187abef7578e5e02a396e4ebd5859c23,
+                0x0d2dec5a6bd72afe18288ca428ec2b94d,
+                0x0c52c111a077bffb1fa4483523cd044c3,
+                -0x0b6a90c79d6230f271bf625c9a6c0dfc5,
+            }, .{
+                0x0d47c566bdedae2e9485f7aa381a98d30,
+                -0x0e5d9573c01a9e56361b202c47f0e51f8,
+                -0x0bdcafb1f08db4920121521b2d2679931,
+                0x08de42eec43f2a5175f928f8a1812575d,
+                0x0c3f57e712a7f8494e51b62d11573d9a0,
+                -0x09287910d439c9bafd56bc8a6faf50cb3,
+                0x07b494af9634c8f95bed2f50fcaf08dda,
+                -0x0a150693d83af2e823dcc0765e4e331e2,
+            });
+
+            try testArgs(@Vector(1, u129), .{
+                0x1e51bd13747df8fae52f0471e9f1ad3f2,
+            }, .{
+                0x00aba9f2c80c71b1ed07201486ea44d00,
+            });
+            try testArgs(@Vector(2, u129), .{
+                0x0c7967a19d13d4bd3f6ebfa0f8fdeab92,
+                0x01614ee2c32757cc92e2efd97e94321a7,
+            }, .{
+                0x175803a2c0bf888bd01e2a5bae28e4523,
+                0x07d05a98ec8f4e7e72aac2712bc09a23a,
+            });
+            try testArgs(@Vector(4, u129), .{
+                0x1b18b768c5852bb1f0a7b34ceaeac4a0c,
+                0x038fc1f995d9378fcf1043598810d7a56,
+                0x06eebd77f9920e79932decf0f29ef658d,
+                0x1f31a766145022050d0b16a4f5d06f1e3,
+            }, .{
+                0x033b20a528a722b6704bf5c8aea497e68,
+                0x0ea05276fb8de4e77e4a8d4ef55f64bba,
+                0x1c632cf252683b00faae6dcb7d73d8b90,
+                0x11a7e1e98bb34fc7f5ed36c327476b122,
+            });
+            try testArgs(@Vector(8, u129), .{
+                0x1b85991584b26492938854a6f6953b766,
+                0x119472b0f4b7199f1267639f601833e3d,
+                0x130d19e6ccfebce09c61c8c8fb526fec0,
+                0x1379f4fa9d25e18ef5138c193b7ec9ef3,
+                0x15fc62d9e21c2c0c63f9cab5ab0a8cf36,
+                0x123ceb2a65f200a0b3e559c801bdbfa58,
+                0x0e740ae3c7ab0cd24c5ff94d9367e3ebb,
+                0x008c7dd67796949390355d866e4f171ef,
+            }, .{
+                0x0ffdda009048cf61476610425d5e55560,
+                0x061bcb9d024d015891b5666e7b317cc84,
+                0x060c013386fb8c129a3bd65b7909f2bc5,
+                0x01c8efdd7ea806b1dc984c6183da53d8f,
+                0x02fae7cee43d7f97448e82b907335bd45,
+                0x0365e7cadcbd0a64decc1377339967c53,
+                0x1a52793ba8a9689e1d7f5036e8cb613e1,
+                0x1257d7cdd8f04058d285a5bce173b5262,
+            });
+
+            try testArgs(@Vector(1, i191), .{
+                -0x23ce1d6b12e301d243024c60aef14b6d068e1d4b1c4f442d,
+            }, .{
+                0x1a7b04f33aa590b99c1162bb32e7681ec267b7826e162512,
+            });
+            try testArgs(@Vector(2, i191), .{
+                -0x25ed7b5b9a9cd517f7f1e2e796f8c5a28ae04af6c1e7bfb3,
+                0x3f298efedf4269276db60344c2bed556ee25d24c5f887000,
+            }, .{
+                0x3c92b38c6c6eb449c25011a4248df259b8452293390d8ad7,
+                -0x35743fe2fa86686c496ffd16f93f2fca1ed742b67b8722a7,
+            });
+            try testArgs(@Vector(4, i191), .{
+                -0x1743b70ad78994b68b5f7ad6066447ce8aefaba4852af2c7,
+                -0x10974462633ebb5631bf83d65ffde3239ff4029c9f62b7ef,
+                0x1783dc9f0afa815051e6d338ef35da013e807da475815af5,
+                0x3f32dd0689a9437b7acc4290920370205825a3f6a4453916,
+            }, .{
+                0x2ef1b4f92da40e0c33a1611f48f25d8f1c4208b5396e51dd,
+                0x0d1e8846fd1a2055386c4d74aa55914ef085196399964a29,
+                0x272c9065c4395186cf7c164fc5d43aeee7fdd08bfd98ad86,
+                0x25438380cc92dba5c9f3d57ef311fad8e14e3a3c18910546,
+            });
+            try testArgs(@Vector(8, i191), .{
+                -0x36d4fd6afdc4bae957b5caed3360891ac4c44383f77e0225,
+                0x2bd89636510607345813f74ee303d3b22435b37a7d09c7ed,
+                -0x3cc372c1659789b967ed7000bfae20f73829a093e9ac9232,
+                -0x2e27691dc3d8ea12093136cf859ef8e0c686ae66e56e073d,
+                -0x24150a1f8bb0097625000af9b186a8927f70993e76702c17,
+                -0x3adebb65a71f180fbf21117bc38fa3aeddebe1e216ae5a70,
+                -0x257688d6ef63e2046bb8bfa11ba84b665e4e12522d56d085,
+                0x173a71a3792e72cbd31f2f10142cf568ff0987e8dfd4ed35,
+            }, .{
+                0x056e3bbfe91bd5adf441278fccaeabbe86cba8ae545dee95,
+                -0x02705836e891812aba265ee3e12d17f6fcdbc69320e52ee9,
+                -0x276a852ba9a4163fe636cf31007b5e3cb087bc44c0948a02,
+                -0x14053135d82f772dd82e7a472315b6b9fa836a27bebcf980,
+                -0x153f064d28f4fb27c827bb5fe2a44c95e3b54d21eba40a1c,
+                -0x30ac5165fa3f132f294a5241d3201973a0671aa3b536e3ae,
+                -0x2ce2be59f487892c58bf667a55724b117e3e8ba3b00c1c31,
+                0x138bb8e5b837ffd06d296ea0af9f533ffe3a36073fafc2d3,
+            });
+
+            try testArgs(@Vector(1, u191), .{
+                0x24f26e4c0b4b20639550564263b0be7083e112cdef6c2d83,
+            }, .{
+                0x020334928f4583d332043b7aef15e832047dfd01b2933038,
+            });
+            try testArgs(@Vector(2, u191), .{
+                0x67074e5315030f5c1a4d035100736de691b5589f6d349162,
+                0x0e655397f96e3fd66b317294a37975d36478242fd6392259,
+            }, .{
+                0x56bb6bd23999b9af6230833f3c661fd54fa2f012673f88bf,
+                0x005c4fd0feb22f04ae76cd1479b6f0d6e62b76b249073cfd,
+            });
+            try testArgs(@Vector(4, u191), .{
+                0x793af4124df65a0815c1f1603d309d0094e1ee29a571ec57,
+                0x32163b39813a85f3c4ca626d14130258782eae8704f2ceb1,
+                0x64a0ac41f560153b3d1f3193faa818db7be2c66f0dbc2457,
+                0x195e210a66b88674fcb13b0bf26190442b71bd53b2df52fc,
+            }, .{
+                0x2668ee718a179e0fa57a1b72aa6a5dea00b60d8fe69a019c,
+                0x447df595f31d0bac74b0dd7a379fbaecb92b2aba1b8615f7,
+                0x0c794ae8369677209d714dd97092b5e4d0c6b9e794e6d9b9,
+                0x2c7a81900b3eef44190b5d0264f9bf072588720247f69693,
+            });
+            try testArgs(@Vector(8, u191), .{
+                0x1afd856825f4458371e2adcbdd7c1b1dbb935b8a6f4a6dca,
+                0x738efc41846cf88223553ed8b4c1b45366088bece1fe8052,
+                0x7841b17a1c38e4066376e5ae204959c02c524635740a9013,
+                0x4ea41d7c910dca17242c74dfb1aedbaf05c9b93fee1e5b77,
+                0x5c6ba91cfae1f4d49cb2b5cedfc090f0ffbde9afa6794788,
+                0x4582bfd463bdf1f5ff4da91fc65bc2a38823f45c05bc485e,
+                0x7cbac126c09224be8017d7d1c9a84014d2af0a3afb14a5b3,
+                0x3068064c244d43736c5454d45b576f4c62324dd5aec39e8a,
+            }, .{
+                0x49907c71f5e8fb9626727be7a949ae8ee47bfc3658b09614,
+                0x5a8f50d921fcdf9d0a20ff050ecbd2447cd3ab7ea3c4d9fc,
+                0x11bfcb2033ef38914dd35cf384f22d5ece33c1685616ba90,
+                0x1996b77370fef92a696ccd5e316eed50b2bc33ae10b245d7,
+                0x1aeacf955748c195f4ead3b032cafce35db2cf253f4065c1,
+                0x6661ba1be480a8ca7cb1ed35c01ec591c34ed15524412434,
+                0x0d357b77a9e3b924156e18bcf41a83620246ad419a9ce9c2,
+                0x467f70d48dfc54fa6cc3ba3f98e66cd1410283f1ec0ae934,
+            });
+
+            try testArgs(@Vector(1, i192), .{
+                0x5348bc83a6f352c931d06b9817458273408470e8ce312bef,
+            }, .{
+                -0x1e9beca133a9c8cde4b5dae26e198968806ca966c6b8c07d,
+            });
+            try testArgs(@Vector(2, i192), .{
+                0x468c1d585cd2e9549f9ead72889e80fedd3eb95a2ee8d869,
+                -0x1c0276cb4b61b673492f493520098742edfa913d455c4f5b,
+            }, .{
+                0x4a503063a094c7e29c2ca37b6c686e16da8d921aa89c05be,
+                0x4c2141b98f3736551e6e08f1d24eb864842753a8c7112ef2,
+            });
+            try testArgs(@Vector(4, i192), .{
+                -0x62a9e282152d85abe46e802c276f18b0542871984cd7bacf,
+                -0x3f01fc751a8bdef2e7a884d7984d6189b36c46048de3035f,
+                0x2eb5379125d8169909e66f4b6bc903c5f6c92952872bf2fc,
+                0x308053e7f5da3a2906cf5b48ab3752a5820cb90ab56d58ce,
+            }, .{
+                0x6bcb4f6375a8ee60b73240550c89edbe0976acd548588d06,
+                0x1b63863c7a8871c34b2eed238ac4747508150578f60fd993,
+                -0x2bc4a2065ebda452635e3c9cd420f9c01f869bf7abe75255,
+                0x6093a6f058b4da28e5f64ebe7684a5d34e2ed48c98bef114,
+            });
+            try testArgs(@Vector(8, i192), .{
+                -0x41d1d3675490a69b9455f1dd57f0c6a7c5e88e734adee8d9,
+                0x03c5f5e58058b7a4a9038ac6717a1b70caf3851d017ed2e7,
+                0x1829fef2dd242cca51c638d69b51e00a6e7847e79df6117c,
+                -0x4622b84d4b52f94d9c933ef3ac435968b1c0b1b3ac1d07a3,
+                -0x1bb5b63f2ad4dbc0c0090131116680074a83f51f79d1af32,
+                -0x6b75cf9bc97f9b012305b718bbc0672f86543a245c363297,
+                -0x43eb0ec7a8995f1340977273858be8f3d620b503a5931574,
+                -0x1dcfa8f4475abd395fb4e8f696cb25625de768b2d5cb0464,
+            }, .{
+                -0x685420e7656ef93e813658d4eef9d44cb0acab0560894da6,
+                -0x191a07e070edaa52554347726d2c0e1d701b52dd462b716a,
+                -0x6a336f271fb3fa7cc33a441e6a0d9bba741d56bc5f83b113,
+                -0x6313298dfc0492db682940661b6ae1a2f56159663f4ba525,
+                0x35b1588853218d6e00c358bff9d9bf86e399a8b5e3db2b67,
+                -0x6bb7cfcd5f78cc6437544a271922eba4fc64c25d4a8a0732,
+                -0x1a40ab85aabcabc56a4e5523c38f5e184aa5c81d9cdc93f5,
+                0x74438ae4acd2fa943409ad7b87feb48a467a845041aa8d21,
+            });
+
+            try testArgs(@Vector(1, u192), .{
+                0x2058bb0137e3c0cf5c4afb9a17d6ca0646594ceadb5a041d,
+            }, .{
+                0xc1a1bff0426a68458ea170fba78d09a9fe172a5a3609e8eb,
+            });
+            try testArgs(@Vector(2, u192), .{
+                0x693ffb174c224e1139c22e38405ad42e96d229c3fd7a8af2,
+                0xce93932e25a8f26d8f3314dd0a56868ee899eeddb321da8d,
+            }, .{
+                0x69b7060f45d65d5d71ffd171aeebb3aaab8a3b313426f9b0,
+                0xd2953b80f910619b3e0af7d65fe8f840b055f8690b3b7a5e,
+            });
+            try testArgs(@Vector(4, u192), .{
+                0x4770c1f64c87afde65aec11764deed53f9a2d533875eb2be,
+                0x40b75e355dc0b2962e5ce23a5b990642371d9f6a80b133bd,
+                0x99c6d4c37fe86bd4d207fc56822f7ff6e8dfbda5f9d71256,
+                0x43d7f6d2a18f88224c447e88848ae335cb58f3122d36de74,
+            }, .{
+                0xaa5d91b484e03b2dc31fc09b69192c265155f978e1ec2294,
+                0xe474f9f62162317d3115396d50a33753b6b709cd3a06f5e5,
+                0xeab9e1fc9c5da4e6e676ebf7cb0d871e9633d738928b8134,
+                0xe493f9557f2e1eda644ee3a5055c912db265c302d588e2ec,
+            });
+            try testArgs(@Vector(8, u192), .{
+                0x3a38fe23afa76c5ac5f8e0c0b27c70d0ca17c8e184033066,
+                0x28104df9a858c83c6301788c0058fd6c58f7e62f0b735099,
+                0x040f1bad46838cfd6bd5d269415512f7fd129b8322e944c4,
+                0x5cc3d6202e8efb4d769535a20db0876c5142ce975cc175e3,
+                0x24211e6d3db188ef22afa6ab4a382bfebb0520b76562bcf7,
+                0x41c2ba9d1d085d99ffd58fd992c9508ed6cd975441710d0a,
+                0x09e6ae22e1869faba24973fc0f686f4310b06c8da2b8b9cb,
+                0x8ae0677c9dcdbe674d252a0c985a81bf9bf32001e2e8cc7e,
+            }, .{
+                0xe10819f5757f84ae79f934711e8cdcd6aac2848c9af0744d,
+                0xf0264ebb8882377dff3f82dcdb9dadfa2350d058be09933e,
+                0xa47708aedbc8100ef25a6de5eb01d7cd5f98074f69e1227b,
+                0xd0145ba1e2d64d053034e2beed2f47179bbafb8b90b5e5f9,
+                0xde348762a0ce1397ad611e84921a7f6f7e0683fd733b9a09,
+                0x4de07b2982883977940773c44ea2a2e2cbf1e1db94fc832d,
+                0x5af700739769e9ce05217e76638edf92a169bc4c3ee7542c,
+                0xa58a80368b056aa1987f504ff1261cff3f5ffc4474ab48a1,
+            });
+
+            try testArgs(@Vector(1, i193), .{
+                -0x0ccbf216d85fdae8d3a7ee2e7746bc6cc3aca87f3f8067f66,
+            }, .{
+                0x0906ac1fd6d47d6c762db9fcc02fa2987ee0c4d66312b79d5,
+            });
+            try testArgs(@Vector(2, i193), .{
+                0x0f3a3fe06faf1140bbe4eb5e0d0268edb529918745ef3ec9f,
+                -0x0609f53b7f095f629a4b358fb0b77332a89dc203ac0cf28b6,
+            }, .{
+                -0x0188251925a3542ea0d568901a19d67796b17339e9b73f88a,
+                0x00f9037c3dab02483b14195ab2ef737090f187b90ebee13e9,
+            });
+            try testArgs(@Vector(4, i193), .{
+                -0x072798ac04a8ca26a36f587412c9bad03e855fd2049ba72ac,
+                -0x01a8dd5d7cc5ae4f3342ea2af61c4349bb777e9d14108eeda,
+                0x0516fc9ec2e14cd7a5e27c0ea83826082c1097fe35f9f006a,
+                -0x06bd00e1b3a8aff93618d16bcec743ad577e379a15eee0a72,
+            }, .{
+                0x0bdf3724329572c17cf6b82f7011daf08bf56bc28acfe650d,
+                0x08049ade287c9661565a18c5bf57d487cdfb5c111033ba199,
+                -0x0917208e354d2765f0944bc9f50836d4bfcdafabfe8e2442a,
+                -0x0a354ea6608f71254d97e615dc45495e0bcccf05466a2db2f,
+            });
+            try testArgs(@Vector(8, i193), .{
+                -0x027bd975ae44664a34f52fd844ada5c23e4b0fa5586a274ec,
+                -0x077e3522a9cb0c6aa10beeb1430e0ac356a48e90f6466c233,
+                -0x0d0d02b7ab7876460c3d4d3118bef476e2c5ddae50af453d5,
+                -0x065c502be097b15379af566a20c4ee93893718330c9a258ce,
+                0x08b5e11e5ff7422f25c99ab70c216eb77e35f269b5fb739a9,
+                0x0d827f83698f1365a5140e1d38072571dac4169c2124ee1d6,
+                0x062e1435b91faafeecadee5551c61dd01f0f132f56e849973,
+                -0x0dbcd14bffb526c9f3286428e5eb273785128417f05336ce0,
+            }, .{
+                0x0a99c2d8d15184fcdaa5b68180f4e10b0c9fcde51d213c257,
+                0x02065bf7e8d093371795f7f376e2fb3ac857a05a3e1f6befb,
+                -0x060f3874a6f65b46738d0117db38b749aff45725000c06213,
+                0x06019cbc9b1466b7669690ae9c6f095257d1c8874e5e27353,
+                0x0bde8419e795cbad708c76ca90de50c00b585c44d78b2ad68,
+                -0x02e271e95d11dbe0cb77c8b829c739bc00e6c9b2f7b532a29,
+                0x0318a691ab34dabf1804facf1b773a9fb6f9e9fd9e63d985e,
+                -0x04d151cd85a0bc49683bbdba28e2410292dc5648335819fae,
+            });
+
+            try testArgs(@Vector(1, u193), .{
+                0x16d3f2329dafe80b17fdff6bfa688636ae9b5ba311b276f3a,
+            }, .{
+                0x15f95cc088cd4d4cabf9de25cad565d1880515c869da2866e,
+            });
+            try testArgs(@Vector(2, u193), .{
+                0x1a40ffb1b6f0762c975581a45d9b6811b96eda8cdd6eb497c,
+                0x1b010ecf0ba30ca695c39cce25d82142f4c67aa1f8ae39947,
+            }, .{
+                0x0851339a84dee70faac763dbd9cbf884c09f011f093846f20,
+                0x16be7f51577c5cfcd423ecdac1bd0edd1fbce6755d7b20cac,
+            });
+            try testArgs(@Vector(4, u193), .{
+                0x10e099200fef4497ed4be9e11269a254ee4f7fea53ae1e360,
+                0x16cef44c285c1d6364cefe22db70934fba8d31dcbcefe2699,
+                0x074c07744bd683a1a5ec77bcb1ac4cda8b840d5b6e5da9852,
+                0x063c5565fbb29b63de719fb574657d50af454f33fb79f59fa,
+            }, .{
+                0x09cd6adfaaf1b9ed0fd6370049a2227088d9b834b74412a3c,
+                0x0162bc69f5c0da662e3862ea235ea46819b737de31d258a45,
+                0x111191f24663c0a425bd31ead4496b4693a089d6bd6082a11,
+                0x127c9d184e79d0b80a87d9acd7fb79d93fd9b08ff480acef9,
+            });
+            try testArgs(@Vector(8, u193), .{
+                0x171ea804993233dcf14ec91b9185a1520416bebdbb2f0f4b3,
+                0x0f74e7e8f4b4759d22de5120dfb4db57205990a899ab3698b,
+                0x094e8a7bf8cf2a802d3a79f77d3932e8a65d11378fb6f8ece,
+                0x06d07aea60256fcb65c306b32ace35809b45baf5fca21efd1,
+                0x17df00d2223a949f6119a3c8cf0cd87165ed48f93ad0eb921,
+                0x173b7201aa391de8d236ff30ed8ae82b7be2ff5de51285361,
+                0x02a95d9fae7d18739a93e174d9ea1b2f108bd2f5997dfa42f,
+                0x0bffa67d4ac0f4bb56cd4fec9ea53d47896f05fd4efc0f6e0,
+            }, .{
+                0x08485a8df517ac7bf9d02ec8d0c34c246dad221f1ea51596b,
+                0x0986d1fda1bca4b83da2cace25768e4d91d89028889443cfa,
+                0x1faa463b7389740db0cf14b3274f1f955536c6a929c89df82,
+                0x072b004463fc7c58f3ae51d7d5d44ca208205ad3396fa6c8b,
+                0x1ee3603ef444b40e5ef74b6d79f3433648fdbfb918d50e4b7,
+                0x1612fe2493d3e02b2fd38ef7664aa9fb079db6843bb201100,
+                0x0d1a4e0c9596afd18e6f06c04ec3fb1bfb660133fc0c7f7aa,
+                0x0de0220f630f0ac5a699cb36d7ced9da11b272dd2eb66e96e,
+            });
+
+            try testArgs(@Vector(1, i255), .{
+                -0x228071e5036248a576fc6f30bda6553bc4f08505cd4fd272e681ddd1a551db11,
+            }, .{
+                -0x14c82347566b6d8eb19064009a7ef16e2d08cd6e40b4bf34f1e6723ad9b0d625,
+            });
+            try testArgs(@Vector(2, i255), .{
+                -0x39313c0014d4850a7957418b2fdfc83a0c29c5f04d90ccd634d7b4e52ee6aef2,
+                -0x096abeb3fdc6451052a96557657d917e9128765c256f83403f788992dd0cd486,
+            }, .{
+                0x2765779b4ffc6c405173fe64f621af1c7d63a91ab3fe5809d066fc428f630c47,
+                0x206df1159f268d4fd99dc8d2228718189161d7095f0af64c7dee86a34a7b875f,
+            });
+            try testArgs(@Vector(4, i255), .{
+                -0x2c31b98911222faeed03f6625c8a75e0ce5fa53be49e79a26695dde5610e28e4,
+                0x38506947b5f5e5ed4cf3f0738140bb988af9e7cf514862861ec7259a8426b4c1,
+                0x01ba20e69c07a1e8845b1cc837d8d588480a2a52b15f0a5532c763f91f3dad9a,
+                0x1877f8233a0a96c33ea2aed47e3388f8961d4a81dd6c8c1a48c77aefe1b7ed6d,
+            }, .{
+                0x021a3c326f7841068338bbc9ee73fba9b36050156f2a6d3b44ecfdf2273496dd,
+                0x0865f2eb5a35c85c480880b26a9a03f51e0f4cd9bbc2b8ab2755f2aadf1cf0e6,
+                0x0d41a6c3956465b187286d95a42d42033f593be4bd681e757f1154a0735b894a,
+                -0x0b87a6b415579cd9889321b01ad8d2b722dc6c932cf7aa97a0c8c807be5f6d68,
+            });
+
+            try testArgs(@Vector(1, u255), .{
+                0x49ee4da820d884bb3693fb576d5b2f16c9f064ba1da5a81838911813a6445dc9,
+            }, .{
+                0x0afbdec22d512f0d88a95d179e6fc901c7f682be0746ce9acfca17b748543381,
+            });
+            try testArgs(@Vector(2, u255), .{
+                0x293f83519c238a446748193388e0ab75567a03a458b4873f4c2b16b9250f15ff,
+                0x1f0f33f7c2d6fc271ae497b6e3b7a7c6fdff096a321843aebd6d07d7f3050bef,
+            }, .{
+                0x788c4bf8d34d23eb8147a7f36ab2d09a96be0f4bcaa6be6816447e9e6e39d0f8,
+                0x2fec35d0092202f654993429949fd5c121554c3cf6072239fcf35aa44d45dd1b,
+            });
+            try testArgs(@Vector(4, u255), .{
+                0x5477956be73c0d9af22c5214b47a39761bf0e88c92dc08ad1955b12f60575982,
+                0x4fd80abb62788804ff3edac72d91096e3747a8fe5a53e5f63b0cb4c1ec85a626,
+                0x411f513c4e4dffd0a699f99b3d9aa50c315fccee34d183086b8209f42d965cd4,
+                0x2561cd45d8e7fb3ddd810396823997354e7c2c4c5529d66b30f5a6ef095d92ce,
+            }, .{
+                0x4bb37f557ba14ab84ebc762ec943d39f5250ecb6005935f3269ba60d8df20d61,
+                0x38358daa8c05bc317383942e3a9189d2f205ba705a76f9285acf9f223d954b36,
+                0x3616b6288a23c31fb4412739d002df3d50b19d23995585a43dfcacc547f1eb49,
+                0x00b4ecb3ddfce395458e448c299f74d8f5c37e36a14d9ba5b6bf8dd3917522d7,
+            });
+
             try testArgs(@Vector(1, i256), .{
                 0x1fe30aed39db1accf4d1b43845aec28c1094b500492555fdf59b4f2f85c6a1ce,
             }, .{
@@ -18653,6 +21069,80 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
                 0xe47a122bd45d5e7d69722d864a6b795ddee965a0993094f8791dd309d692de8b,
             });
 
+            try testArgs(@Vector(1, i257), .{
+                -0x037f102b7ce87d2f4c704b7dd9c77c79d5ef99cbaa890cdf5be7f1c991b377f12,
+            }, .{
+                0x0e6613140253b86eb76f9cf0da699c734f1073559d4d59b876727531aa1566a3a,
+            });
+            try testArgs(@Vector(2, i257), .{
+                -0x0516f834d832f5b33a8b766b5830ae9b6ed2a8be3347d7cce6a0d536c0ccdcda3,
+                -0x04148b4556c411a3db079163f1aba615971677b03abf31a34abe73cf054957e01,
+            }, .{
+                0x01de4743d129cde4400547974ca9e9cfe234fe8fa67ec3c00f70b52f16a683ac9,
+                -0x010dfb09c7a42112f07962065751b8bcabe282143d79aaad484080f2c15ac41a1,
+            });
+            try testArgs(@Vector(4, i257), .{
+                -0x097f0ce2c2a4de17cd779503e3e86de1fa9153ca69674546367166703b79658aa,
+                0x0d6414d92755101344039202da1d6ea15e7054817dbcf4f30c16f85eaf48f3a85,
+                0x01e73273f475e7fb3111f8a4212eba3f736c536006f1f1a0fa0656fd3fc34fc66,
+                -0x0277808e445419c1f814213ef86dec08f7a0192ac985dd22043a8161e0f291c42,
+            }, .{
+                0x0a3a6678ee5f9458ea259d8434c1604cbfab67b294525a7b2e6ee5dee752db0d8,
+                0x009f39291d0f97269ce694958d6252a666b928737e645865e38fc70995307290a,
+                -0x0beed4ee766fb1a04a66a0cbad3da0f471b5c0e32c252279b23feddad2877d35c,
+                -0x02ad0e2fa1940ad0d2ba67f8f27b486ec781bf5da1f580a9bba0ba8bb0d11aff5,
+            });
+
+            try testArgs(@Vector(1, u257), .{
+                0x1bb62cd7dbcbbbf2e708871d12f647840997f16f6d322eae96393b3b46ad0ae11,
+            }, .{
+                0x1d4361c83425068c40a7a142019b4004a496cf16649773aa04431225b189fbd68,
+            });
+            try testArgs(@Vector(2, u257), .{
+                0x08459d63d1124e6bf747a2dba45df79ba9813451189f4e9bd8fcae37d92646ef9,
+                0x1336f89e29d7da4d741e10a8a8016e007ad3f475c7b302a03271f0edcb2dbaa98,
+            }, .{
+                0x0fd91b46af0a41227ae191250a1d49b7e44e4435f371eac7e8355b8f3ccff1ccb,
+                0x03914f3814478e96cf3efb4169aa36747aa4bc33daa56ca41134dd71a3af85de6,
+            });
+            try testArgs(@Vector(4, u257), .{
+                0x0aa2d811711d70ec5dd639ecb979dda726c157bdc18dc34447c3026fac49d3909,
+                0x00652c96fa6a34772a424e4b9c7c3613558c79f4144349e0d700c15ff9ec2f974,
+                0x11b10abfe69b4f75c11e0a0ee128526ec9f3fb7b32502d1005984b0c0652ff7c4,
+                0x053ea83a9caded41dd751a742b49b062fe1fd62af3d3025486bc1af7921225ab0,
+            }, .{
+                0x018c2110a0432d0acd462886f559f826bdfb05e91e61e2928a3a43b98d1e6bfab,
+                0x1b212dde794f97203018963e51b025b21a5dd47f04a007fee80aaaadb87e30140,
+                0x1394a84c2431e46862d33dbf0dd0cf23f7ff7f85c0107c04cdeca1c168df5c556,
+                0x060c9e2ba327cde7650bbe329345b4184223d77adda253c5f425531676e863c8b,
+            });
+
+            try testArgs(@Vector(1, i511), .{
+                0x25c69b25440d3059c5c38ba2771252430152afcbcb988d8c5de0832f49f1d8649a17a4e0dd508d8cd7349adc4ba228902099092726af175a8f04f29a19ded5ed,
+            }, .{
+                0x0c28729ed05abb52e888bb7fefe58f783ed5c7ef3c8a4cdd7349fe47edca26db746de0308e642b64b659a52e17405dac9932ec43499e6f17b6cbebcab597e577,
+            });
+            try testArgs(@Vector(2, i511), .{
+                0x2b6235f2d231f63ebc67ac71893fa5ad6f2125b6d50a5f9eedaf8bb4de3e116939ab5b2c0e9b7cbc0a2308c3a5dd4a99049f4538cffd4155b24721e3c77bc268,
+                0x10cac70f8d1dbb88f1dfd913823e8fa53ad58f54929222c1c7bedb591dd3c90ecc5c1239fccc80515b5bbf4c4d47669f267b3880dd8f465f6c7e9fc6e63faac4,
+            }, .{
+                -0x3dc3af786e95767befb16c51f5602029a5fdbc76dafafeab2c409168332f8c5c038a0e7f3d0021acaf6eb6f6ff9a232a9dd19e5b33c7c4158f8f1798150448f1,
+                -0x0eeaa2dc65820153224f26847a99a3626d6ee9991ecba613e721bbd169e69371dd5bcadc8983ae9b82d77376b0e8179997e400fce64c74c9efd2b4a5f174b854,
+            });
+
+            try testArgs(@Vector(1, u511), .{
+                0x207381c6b742f50bf76d0d220943d9354d96f4cf27e979bac6c8f47d70d64d44153dd6c2ed62cc7b5a4fce98600382fbece15ee4e4b3d1c0d4277a553ac01c10,
+            }, .{
+                0x35ad1698d693b3b7618d1243163f1ce2beb5f6c3c7b6e33a24ce9639e5a3a30f78350f4c3c818512377bf89851388e1d444a50b20a10a2ce66c60d0af1bebc84,
+            });
+            try testArgs(@Vector(2, u511), .{
+                0x039131b98944347f35fe54337902bba6b975a1b6bd9e36a20e236f3149b53156b5bfa0b468a56dfb1c09684a8f24b5d548c6e216c20dde01813c044cf031a3eb,
+                0x43ac7c9afa88f5169405ff4963557bb7e78ed15eda5bfa91335f7d9117ee13d969d6cdd2f0910f8865cb57687fd2e0f4e6cb188bb34759609724a7ce128c0db1,
+            }, .{
+                0x43dc8c03e26c12e96d69076a68afd3e0515ffd67fd2b2aeb8457c92e7e2ec6c503a362866ddd99d4a4f21e7bece901e3df76a9496e978d11f4c4cc50d1e52601,
+                0x4f97c33b53b3c4b5a59c257b575149524e7e4ec4fcfe1574a9a3111b066959d39affe87e6e99656a80d64ae95d60ef4f90c2544559d22abbf26d6ab34e5b3074,
+            });
+
             try testArgs(@Vector(1, i512), .{
                 -0x439ba81b44584e0c4d7abc80d18ab9d679a4e921884e877b28d04eb15b2d3e7be8d670b0aba2c4cc25c12655e1899ab514d0a6e50a221bcf076d506e6411d5c2,
             }, .{
@@ -18679,6 +21169,44 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
                 0x8fa3d86099e9e2789d72f8e792290356d659ab20ac0414ff94745984c6ae7d986082197bb849889f912e896670aa2c1a11bd7e66e3f650710b0f0a18a1533f90,
             });
 
+            try testArgs(@Vector(1, i513), .{
+                0x0dd56664962c44dbd9941a8e45102e1e050ef164752b954c4029ce6a28752c97b76ce3b0ae50dd09076fc16c89c628bf82ea7d3250101c3ee1316e8c51a746a4b,
+            }, .{
+                -0x0c4f50a700f8d91c3944c66e6932ea9cf0433a309dd41fd8ec1ab6e7c7f031de17c7fa7bde7a162fd653c1911aeddd176271f5bd76cca68eeab79ffde88835808,
+            });
+            try testArgs(@Vector(2, i513), .{
+                0x0099b42682a76cfc1d6a0b680cd44c907387e78ca92d4c30c555dd6b05f136ad7e136f892641f1b256ef2aa10b1497d1e5a25c9e29260bd861b4fdc1ccc821a4c,
+                -0x03aafdfa35c0b27515ee422ee71afcb157c6b578a77b514a6134e759bd80100f41344d6016a4dc252034667cb7a9b5165c058c5af0a632ed4b9a49d345b54d711,
+            }, .{
+                -0x07c699f50cc1592587bae58fa52742130df1dcd12da8ab1a15d48cbb3c8adeccacd16da37b91ba8a4ffe02669a089e3a1aadf325f161b99a010e76275a11f8dd0,
+                0x0cec94f5064e3d4736016908cee5bd5469c2c60ed22c560a68b5bbc3a912b984195d7a2aa499db9b67779eadf0158ac9e9c166d58d42720834c5cde96d9a22c33,
+            });
+
+            try testArgs(@Vector(1, u513), .{
+                0x167f9eb1095f756f462ceb2a48b7a5230a92f9ca6c572f394d741475cfa791e9666852b7696944f624f938f9474fe64e2189c1a584bdadc70d0f6db5c94355c78,
+            }, .{
+                0x06566ddffb298aee609074e06fbd881774623431f401410416645844a6c95f65cb08e1765f9c80bdf8f6d0d4c8ec9113d96b6e94cf97909d7da8c6165773162d8,
+            });
+            try testArgs(@Vector(2, u513), .{
+                0x0e58f2fad85c025548e8c011faf78307d5237f25c41a319b0ea826704fba3db56f5e1074e6c76c8ecb83004058ff7dc5157d397d93d6725ac604efe0a48e27b8e,
+                0x1635d3b34a3186f0e3c8d6ecdea25c84be4ef1f1bbc503dba90e9a260ffd8ec781b857c28e30fcc108ea93c3afa6acb91de3ad3fdb8e68cacdf412bcd31121c5e,
+            }, .{
+                0x043c3969668c05c0bec64d7be9741790a17588b8fd35ab88b8708c32658acc6e92dfc1691ee41da1278f7abbfc3f92aea885cabc17c556688f0971ca40b2acdef,
+                0x198d4fc313ec8bafc85712c426223460c465a976aade6ca2b21ccb216257519675dda21f0707134920d23479c983e0d8fc75bb5e113f19fa3b4f63a69329cf723,
+            });
+
+            try testArgs(@Vector(1, i1023), .{
+                -0x18ad357e523014b4b2b02d7802e8d6687e0b37e0e20bc992d9a1d3498cdccc3683c62628505026725ccab8e2d7da378de5e3dd539f168530e83b8add890851977a58c640102ebeb7d15f56b024a54636008af9232f73ac4a83f9e502ed1f6cf0647e4d6c2cc6c6e8fc4a49abcb2e34fa927cc114692905d73ffed1aed664eab4,
+            }, .{
+                0x29757ede90f3fd7a77d970667941eee2f7f7df5dc1100562c8e3bd45dcf1cbbbffface90f0b4f2aef49642e1cdeaa19045cc6dcf9f81750bee8e9d84d951da233d16878ae1473d42146660bc454a78a4bc22ebf2916b7f535c4b88302ea0108b458bd38660e95b0ae703d268cff78b39be828918cf6bd9ad16a90d407d3ee5b1,
+            });
+
+            try testArgs(@Vector(1, u1023), .{
+                0x7e22aa4f394329943e1f265df8327c44032b28baea5ce81dfdf9781ed2c9ad337964b57c1ad4cb03cb920035c85e8c6e475ad33742874226beeba62e3e130ff6fbd21e902e49f7f95c7c3b1c6d7ce34a1ed85ba8028b41d19ab9547e05da56e6c8fba7c9c4f949412808ac3fb8709e490a859bebe22a77e704c04ea44a4579a0,
+            }, .{
+                0x4d2ec73e1a38a7373514259ed749a9895a5c45e53498ad3e75690116ec167321c0bcaa4fe86301b486eba831e7c15c3872676afe677d01ad5d088b51d64248d1bce2e191dcb87d6c9f9b944554b5c5a74bb64c7eea50a0badc2f292251b640c97d5b8e9010eb2f034d77b3a7f14aafb76c104b196a4b76073503acb085055209,
+            });
+
             try testArgs(@Vector(1, i1024), .{
                 -0x4fe568569c0531c9bfbbda1516e93a6c61a3d035c98e13fdc85225165a3bea84d5dc6b610ced008f9321453af42ea50bbf6881d40d2759b73b9b6186c0d6d243f367e292cbbf6b5c5c30d7f4e8de19701c7b0fc9e67cdf31228daa1675a4887f6c4f1588b48855d6f4730a21f27dec8a756c568727709b65cd531020d53ff394,
             }, .{
@@ -18690,6 +21218,18 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
             }, .{
                 0xf1e3bbe031d59351770a7a501b6e969b2c00d144f17648db3f944b69dfeb7be72e5ff933a061eba4eaa422f8ca09e5a97d0b0dd740fd4076eba8c72d7a278523f399202dc2d043c4e0eb58a2bcd4066e2146e321810b1ee4d3afdddb4f026bcc7905ce17e033a7727b4e08f33b53c63d8c9f763fc6c31d0523eb38c30d5e40bc,
             });
+
+            try testArgs(@Vector(1, i1025), .{
+                -0x0aac7daecbe81ad0f5b3582238ce842a9e57f580af344429c55785eb8ce32d28658417792d10e5263c6c7d0ab7d8ab6198d78bd024ce9c23de9470b20aa6eaf9dd301034cfee6b22025be5df4e91708d7cc9e980959a449b0cb893355392d1c94c4a4ed67d91108df655383f5f8fcde66f22dbd6453d838c1d160fb80ee07ab18,
+            }, .{
+                -0x0be0a1d4693d8969af6e26aa98ddb82f44124aa292fb336dd90cb5f28d708a33ef2d055db58e32578c5c20bb436a613b8ca214914db5066d458599600ced96129f4894b80293a3975e2bf7fd1a1f396d128ef89fd0609d2e518534e66c5e46c90b0a73e4a807c8a6decba204ac6e11859a492df1c81beec8b04961afa8544e081,
+            });
+
+            try testArgs(@Vector(1, u1025), .{
+                0x129e165d8601a1ef41658e3ab9a7d0993124c46a37a672395a1314d5f8984de3c73e4569f1bd91f28aa8bf3e940d2121ef8bb557023abd80deb6761a7b0e2597763e5b895a52fc32308cc39b34a31f17fd8fe04bd1817e5b4a1046bbc1ee2bd360274e667be4392874a7dd8de7c8c054e3e6919302cb2ad46743798591ad0accb,
+            }, .{
+                0x07ff746b3d7ed091996cb20d21d6e85397c7daa127063a9f30cdb91483b145f2af3aa0bcf58188bc171e97a7b07800ee007af0305fb40e086ed2289dc7c303961d325bb799920a47de27bb16f6a868d80e93769982d81aa56cc3d1dbc87f1138179f0af4f6def885ade090d2725a044b500ef56fe39794906d45330fab9a4f81f,
+            });
         }
         fn testFloatVectors() !void {
             @setEvalBranchQuota(21_700);
@@ -19096,6 +21636,15 @@ test addUnsafe {
     try test_add_unsafe.testFloatVectors();
 }
 
+inline fn addWrap(comptime Type: type, lhs: Type, rhs: Type) Type {
+    return lhs +% rhs;
+}
+test addWrap {
+    const test_add_wrap = binary(addWrap, .{});
+    try test_add_wrap.testInts();
+    try test_add_wrap.testIntVectors();
+}
+
 inline fn subUnsafe(comptime Type: type, lhs: Type, rhs: Type) AddOneBit(Type) {
     @setRuntimeSafety(false);
     switch (@typeInfo(Scalar(Type))) {
@@ -19116,6 +21665,15 @@ test subUnsafe {
     try test_sub_unsafe.testFloatVectors();
 }
 
+inline fn subWrap(comptime Type: type, lhs: Type, rhs: Type) Type {
+    return lhs -% rhs;
+}
+test subWrap {
+    const test_sub_wrap = binary(subWrap, .{});
+    try test_sub_wrap.testInts();
+    try test_sub_wrap.testIntVectors();
+}
+
 inline fn mulUnsafe(comptime Type: type, lhs: Type, rhs: Type) DoubleBits(Type) {
     @setRuntimeSafety(false);
     return @as(DoubleBits(Type), lhs) * rhs;
test/behavior/x86_64.zig
@@ -3,6 +3,9 @@
 test {
     const builtin = @import("builtin");
     if (builtin.zig_backend != .stage2_x86_64) return error.SkipZigTest;
+    // MachO linker does not support executables this big.
+    if (builtin.object_format == .macho) return error.SkipZigTest;
+    // COFF linker does not support the new backend.
     if (builtin.object_format == .coff) return error.SkipZigTest;
     _ = @import("x86_64/math.zig");
     _ = @import("x86_64/mem.zig");