Commit 39119088f9

Jacob Young <jacobly0@users.noreply.github.com>
2025-02-03 07:10:34
x86_64: rewrite vector `@truncate`
1 parent c58e60a
Changed files (3)
src
test
behavior
x86_64
src/arch/x86_64/CodeGen.zig
@@ -2414,7 +2414,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
 }
 
 fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
-    @setEvalBranchQuota(4_600);
+    @setEvalBranchQuota(5_500);
     const pt = cg.pt;
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
@@ -21970,9 +21970,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_size), ._, ._ },
-                        .{ .@"0:", .v_ps, .mova, .tmp1y, .memsia(.src0y, .@"2", .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_, .cvtps2ph, .memia(.dst0x, .tmp0, .add_size), .tmp1y, .rm(.{}), ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_ps, .mova, .tmp1y, .memsia(.src0y, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_, .cvtps2ph, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1y, .rm(.{}), ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -22355,9 +22355,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_size), ._, ._ },
-                        .{ .@"0:", .v_ps, .cvtpd2, .tmp1x, .memsia(.src0y, .@"2", .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_ps, .cvtpd2, .tmp1x, .memsia(.src0y, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -22382,9 +22382,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_size), ._, ._ },
-                        .{ .@"0:", ._ps, .cvtpd2, .tmp1x, .memsia(.src0x, .@"2", .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._ps, .movl, .memia(.dst0q, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._ps, .cvtpd2, .tmp1x, .memsia(.src0x, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._ps, .movl, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -23292,9 +23292,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_ps, .mova, .memsia(.dst0y, .@"2", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_ps, .mova, .memsia(.dst0y, .@"2", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -23500,13 +23500,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
                         .{ ._, .v_pd, .cvtps2, .tmp2y, .tmp1x, ._, ._ },
                         .{ ._, .v_f128, .extract, .tmp1x, .tmp1y, .ui(1), ._ },
-                        .{ ._, .v_pd, .mova, .memsia(.dst0y, .@"4", .tmp0, .add_size), .tmp2y, ._, ._ },
+                        .{ ._, .v_pd, .mova, .memsia(.dst0y, .@"4", .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
                         .{ ._, .v_pd, .cvtps2, .tmp2y, .tmp1x, ._, ._ },
-                        .{ ._, .v_pd, .mova, .memsiad(.dst0y, .@"4", .tmp0, .add_size, 32), .tmp2y, ._, ._ },
+                        .{ ._, .v_pd, .mova, .memsiad(.dst0y, .@"4", .tmp0, .add_unaligned_size, 32), .tmp2y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -23975,9 +23975,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .v_pd, .cvtps2, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_pd, .mova, .memsia(.dst0y, .@"2", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_pd, .cvtps2, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_pd, .mova, .memsia(.dst0y, .@"2", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -24002,9 +24002,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", ._pd, .cvtps2, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._pd, .mova, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._pd, .cvtps2, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._pd, .mova, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -24077,6 +24077,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", .f_, .ld, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._, ._ },
@@ -24237,6 +24238,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", .f_, .ld, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._, ._ },
@@ -24724,6 +24726,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
@@ -24749,6 +24752,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
@@ -24847,6 +24851,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
@@ -24872,6 +24877,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
@@ -24898,6 +24904,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
@@ -24923,6 +24930,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
@@ -24949,6 +24957,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
@@ -24976,6 +24985,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
@@ -25089,6 +25099,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
@@ -25114,6 +25125,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
@@ -25139,6 +25151,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
@@ -25164,6 +25177,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
@@ -25235,6 +25249,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
@@ -25260,6 +25275,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
@@ -25285,6 +25301,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
@@ -25310,6 +25327,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
@@ -25358,6 +25376,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
@@ -26157,7 +26176,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .word } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26171,10 +26190,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_w, .movsxb, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_w, .movsxb, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26183,7 +26203,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .yword, .is = .word } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26197,10 +26217,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_w, .movzxb, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_w, .movzxb, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26209,7 +26230,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26223,10 +26244,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_w, .movsxb, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_w, .movsxb, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26235,7 +26257,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .word } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26249,10 +26271,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_w, .movzxb, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_w, .movzxb, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26261,7 +26284,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26275,10 +26298,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_w, .movsxb, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_w, .movsxb, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26287,7 +26311,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .word } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26301,10 +26325,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_w, .movzxb, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_w, .movzxb, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26327,6 +26352,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -26352,6 +26378,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -26378,6 +26405,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -26403,6 +26431,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -26537,7 +26566,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26551,10 +26580,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_d, .movsxb, .tmp1y, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_d, .movsxb, .tmp1y, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26563,7 +26593,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .yword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26577,10 +26607,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_d, .movzxb, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_d, .movzxb, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26589,7 +26620,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26603,10 +26634,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_d, .movsxb, .tmp1x, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_d, .movsxb, .tmp1x, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26615,7 +26647,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26629,10 +26661,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_d, .movzxb, .tmp1x, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_d, .movzxb, .tmp1x, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26641,7 +26674,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26655,10 +26688,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_d, .movsxb, .tmp1x, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_d, .movsxb, .tmp1x, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26667,7 +26701,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26681,10 +26715,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_d, .movzxb, .tmp1x, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_d, .movzxb, .tmp1x, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26707,6 +26742,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -26732,6 +26768,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -26758,6 +26795,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -26783,6 +26821,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -26920,7 +26959,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26934,10 +26973,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movsxb, .tmp1y, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"8", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movsxb, .tmp1y, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"8", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26946,7 +26986,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .yword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26960,10 +27000,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movzxb, .tmp1y, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"8", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movzxb, .tmp1y, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"8", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26972,7 +27013,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -26986,10 +27027,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movsxb, .tmp1x, .memia(.src0w, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"8", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movsxb, .tmp1x, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"8", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -26998,7 +27040,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27012,10 +27054,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movzxb, .tmp1x, .memia(.src0w, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"8", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movzxb, .tmp1x, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"8", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27024,7 +27067,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27038,10 +27081,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_q, .movsxb, .tmp1x, .memia(.src0w, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"8", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_q, .movsxb, .tmp1x, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"8", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27050,7 +27094,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .byte } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27064,10 +27108,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_q, .movzxb, .tmp1x, .memia(.src0w, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"8", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_q, .movzxb, .tmp1x, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"8", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27090,6 +27135,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsx, .tmp1q, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -27116,6 +27162,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsx, .tmp1q, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -27142,6 +27189,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -27167,6 +27215,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
@@ -27301,6 +27350,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp1q, .mem(.dst0), ._, ._ },
@@ -27331,6 +27381,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp1q, .mem(.dst0), ._, ._ },
@@ -27361,6 +27412,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp1q, .mem(.dst0), ._, ._ },
@@ -27391,6 +27443,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp1q, .mem(.dst0), ._, ._ },
@@ -27526,7 +27579,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27540,10 +27593,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_d, .movsxw, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_d, .movsxw, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27552,7 +27606,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .yword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27566,10 +27620,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_d, .movzxw, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_d, .movzxw, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27578,7 +27633,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27592,10 +27647,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_d, .movsxw, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_d, .movsxw, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27604,7 +27660,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27618,10 +27674,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_d, .movzxw, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_d, .movzxw, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27630,7 +27687,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27644,10 +27701,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_d, .movsxw, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_d, .movsxw, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27656,7 +27714,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27670,10 +27728,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_d, .movzxw, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_d, .movzxw, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27695,6 +27754,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
@@ -27720,6 +27780,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
@@ -27854,7 +27915,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27868,10 +27929,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movsxw, .tmp1y, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movsxw, .tmp1y, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27880,7 +27942,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .yword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27894,10 +27956,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movzxw, .tmp1y, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movzxw, .tmp1y, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27906,7 +27969,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27920,10 +27983,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movsxw, .tmp1x, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movsxw, .tmp1x, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27932,7 +27996,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27946,10 +28010,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movzxw, .tmp1x, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movzxw, .tmp1x, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27958,7 +28023,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27972,10 +28037,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_q, .movsxw, .tmp1x, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_q, .movsxw, .tmp1x, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -27984,7 +28050,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -27998,10 +28064,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_q, .movzxw, .tmp1x, .memia(.src0d, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_q, .movzxw, .tmp1x, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"4", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -28024,6 +28091,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsx, .tmp1q, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
@@ -28050,6 +28118,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
@@ -28181,6 +28250,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp1q, .mem(.dst0), ._, ._ },
@@ -28211,6 +28281,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp1q, .mem(.dst0), ._, ._ },
@@ -28346,7 +28417,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -28360,10 +28431,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movsxd, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movsxd, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -28372,7 +28444,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .yword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -28386,10 +28458,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movzxd, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movzxd, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"2", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -28398,7 +28471,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .dword } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -28412,10 +28485,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movsxd, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movsxd, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -28424,7 +28498,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .word } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -28438,10 +28512,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .vp_q, .movzxd, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_q, .movzxd, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -28450,7 +28525,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .dword } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -28464,10 +28539,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_q, .movsxd, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_q, .movsxd, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -28476,7 +28552,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .dword } }, .any },
                     .dst_constraints = .{.{ .multiple_scalar_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
                         .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
@@ -28490,10 +28566,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
-                        .{ .@"0:", .p_q, .movzxd, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ },
-                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .p_q, .movzxd, .tmp1x, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memsia(.dst0x, .@"2", .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
                         .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
                         .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
@@ -28516,6 +28593,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .movsxd, .tmp1q, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
@@ -28542,6 +28620,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
@@ -28670,6 +28749,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp1q, .mem(.dst0), ._, ._ },
@@ -28700,6 +28780,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
                         .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ },
                         .{ ._, ._, .lea, .tmp1q, .mem(.dst0), ._, ._ },
@@ -28714,7 +28795,5049 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 }, .{
                     .required_features = .{ .@"64bit", .slow_incdec, null, null },
                     .src_constraints = .{ .any_scalar_signed_int, .any },
-                    .dst_constraints = .{.any_scalar_signed_int},
+                    .dst_constraints = .{.any_scalar_signed_int},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .{ .type = .i64, .kind = .{ .reg = .rax } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-1, .src0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._sq, .lod, ._, ._, ._, ._ },
+                        .{ ._, ._sq, .sto, ._, ._, ._, ._ },
+                        .{ ._, ._r, .sa, .tmp4q, .ui(63), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa2(.dst0, .src0, .add_delta_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.any_scalar_signed_int},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .{ .type = .i64, .kind = .{ .reg = .rax } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-1, .src0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._sq, .lod, ._, ._, ._, ._ },
+                        .{ ._, ._sq, .sto, ._, ._, ._, ._ },
+                        .{ ._, ._r, .sa, .tmp4q, .ui(63), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa2(.dst0, .src0, .add_delta_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .sto, ._, ._, ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .slow_incdec, null, null },
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.any_scalar_int},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .{ .type = .u64, .kind = .{ .reg = .rax } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sa(.src0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa2(.dst0, .src0, .add_delta_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.any_scalar_int},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .{ .type = .u64, .kind = .{ .reg = .rax } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sa(.src0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa2(.dst0, .src0, .add_delta_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .sto, ._, ._, ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                } }) catch |err| switch (err) {
+                    error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
+                        @tagName(air_tag),
+                        dst_ty.fmt(pt),
+                        src_ty.fmt(pt),
+                        ops[0].tracking(cg),
+                    }),
+                    else => |e| return e,
+                };
+                try res[0].finish(inst, &.{ty_op.operand}, &ops, cg);
+            },
+            .trunc => |air_tag| if (use_old) try cg.airTrunc(inst) else {
+                const ty_op = air_datas[@intFromEnum(inst)].ty_op;
+                var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
+                var res: [1]Temp = undefined;
+                cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{
+                    .src_constraints = .{ .{ .signed_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .exact_signed_int = 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .@"and", .dst0d, .si(1), ._, ._ },
+                        .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_signed_int, .any },
+                    .dst_constraints = .{.{ .exact_signed_int = 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .mem(.src0b), ._, ._ },
+                        .{ ._, ._, .@"and", .dst0d, .si(1), ._, ._ },
+                        .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .int = .gpr }, .any },
+                    .dst_constraints = .{.{ .exact_int = 8 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{} },
+                }, .{
+                    .src_constraints = .{ .any_signed_int, .any },
+                    .dst_constraints = .{.{ .exact_signed_int = 8 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movsx, .dst0d, .mem(.src0b), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .exact_unsigned_int = 8 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .mem(.src0b), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .signed_int = .byte }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._l, .sa, .dst0b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .dst0b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .byte }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .@"and", .dst0b, .sa(.dst0, .add_umax), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_int, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .byte }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .mem(.src0b), ._, ._ },
+                        .{ ._, ._, .@"and", .dst0b, .sa(.dst0, .add_umax), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .int = .gpr }, .any },
+                    .dst_constraints = .{.{ .exact_int = 16 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{} },
+                }, .{
+                    .src_constraints = .{ .any_signed_int, .any },
+                    .dst_constraints = .{.{ .exact_signed_int = 16 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movsx, .dst0d, .mem(.src0w), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .exact_unsigned_int = 16 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .mem(.src0w), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .fast_imm16, null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .word }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .@"and", .dst0w, .sa(.dst0, .add_umax), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .fast_imm16, null, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .word }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .movzx, .dst0d, .mem(.src0w), ._, ._ },
+                        .{ ._, ._, .@"and", .dst0w, .sa(.dst0, .add_umax), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .int = .gpr }, .any },
+                    .dst_constraints = .{.{ .exact_int = 32 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{} },
+                }, .{
+                    .src_constraints = .{ .any_int, .any },
+                    .dst_constraints = .{.{ .exact_int = 32 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0d, .mem(.src0d), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .signed_int = .dword }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._l, .sa, .dst0d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .dst0d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_signed_int, .any },
+                    .dst_constraints = .{.{ .signed_int = .dword }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0d, .mem(.src0d), ._, ._ },
+                        .{ ._, ._l, .sa, .dst0d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .dst0d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .dword }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .@"and", .dst0d, .sa(.dst0, .add_umax), ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .dword }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0d, .mem(.src0d), ._, ._ },
+                        .{ ._, ._, .@"and", .dst0d, .sa(.dst0, .add_umax), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_int, .any },
+                    .dst_constraints = .{.{ .exact_int = 64 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0q, .mem(.src0q), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .signed_int = .qword }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._l, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_signed_int, .any },
+                    .dst_constraints = .{.{ .signed_int = .qword }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0q, .mem(.src0q), ._, ._ },
+                        .{ ._, ._l, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .qword }},
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ ._, ._, .bzhi, .dst0q, .src0q, .dst0q, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .qword }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ ._, ._, .bzhi, .dst0q, .mem(.src0q), .dst0q, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .qword }},
+                    .patterns = &.{
+                        .{ .src = .{ .mem, .none } },
+                        .{ .src = .{ .to_gpr, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .unsigned_int = .qword }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .dst0q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .dst0q, .mem(.src0q), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_int, .any },
+                    .dst_constraints = .{.{ .exact_remainder_int = .{ .of = .xword, .is = .xword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_signed_int, .any },
+                    .dst_constraints = .{.{ .exact_remainder_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sia(-2, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
+                        .{ ._, ._r, .sa, .tmp0q, .ui(63), ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_signed_int, .any },
+                    .dst_constraints = .{.{ .remainder_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sia(-2, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp0q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp0q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
+                        .{ ._, ._r, .sa, .tmp0q, .ui(63), ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_signed_int, .any },
+                    .dst_constraints = .{.{ .remainder_signed_int = .{ .of = .xword, .is = .xword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sia(-1, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp0q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp0q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .exact_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sia(-1, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sia(-2, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._, .bzhi, .tmp2q, .memad(.src0q, .add_size, -16), .tmp2q, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp2q, ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .remainder_unsigned_int = .{ .of = .xword, .is = .xword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sia(-1, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._, .bzhi, .tmp2q, .memad(.src0q, .add_size, -8), .tmp2q, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp2q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sia(-2, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any },
+                    .dst_constraints = .{.{ .remainder_unsigned_int = .{ .of = .xword, .is = .xword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sia(-1, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp0q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                        .{ ._, .vp_b, .add, .dst0x, .dst0x, .lea(.tmp0x), ._ },
+                        .{ ._, .vp_, .xor, .dst0x, .dst0x, .lea(.tmp0x), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                        .{ ._, .p_b, .add, .dst0x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, .p_, .xor, .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._ps, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .yword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .yword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                        .{ ._, .vp_b, .add, .dst0y, .dst0y, .lea(.tmp0y), ._ },
+                        .{ ._, .vp_, .xor, .dst0y, .dst0y, .lea(.tmp0y), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .yword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .yword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_32_i8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_32_i8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_32_i8, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp3y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .vp_b, .add, .tmp3y, .tmp3y, .tmp2y, ._ },
+                        .{ ._, .vp_, .xor, .tmp3y, .tmp3y, .tmp2y, ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp3y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp3x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .vp_b, .add, .tmp3x, .tmp3x, .tmp2x, ._ },
+                        .{ ._, .vp_, .xor, .tmp3x, .tmp3x, .tmp2x, ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp3x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp3x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .p_b, .add, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, ._ps, .mova, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._ps, .mova, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, ._ps, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_signed_int = .{ .of = .byte, .is = 1 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .si(1), ._, ._ },
+                        .{ ._, ._, .neg, .tmp1b, ._, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_signed_int = .{ .of = .byte, .is = 1 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .si(1), ._, ._ },
+                        .{ ._, ._, .neg, .tmp1b, ._, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0b, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0b, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memsia(.src0b, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memsia(.src0b, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0b, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0b, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, .slow_incdec, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, .slow_incdec, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .byte, .is = 8 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp2b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp2b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp2b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp2b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, .slow_incdec, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp3d, .memi(.src0d, .tmp1), .tmp2d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp3b, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp3d, .memi(.src0d, .tmp1), .tmp2d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp3b, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp2b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp2b, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_unaligned_size), .tmp2b, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .sll, .dst0x, .src0x, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_w, .sra, .dst0x, .dst0x, .uia(16, .dst0, .sub_bit_size), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_w, .sll, .dst0x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, .p_w, .sra, .dst0x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._ps, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .yword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .yword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .sll, .dst0y, .src0y, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_w, .sra, .dst0y, .dst0y, .uia(16, .dst0, .sub_bit_size), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .yword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .yword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .vp_w, .sll, .tmp1y, .tmp1y, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_w, .sra, .tmp1y, .tmp1y, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u16, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_u16, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .vp_w, .sll, .tmp1x, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_w, .sra, .tmp1x, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .p_w, .sll, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, .p_w, .sra, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, ._ps, .mova, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._ps, .mova, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, ._ps, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .word, .is = 16 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .fast_imm16, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1w, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .word, .is = 16 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp2w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .fast_imm16, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1w, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .word, .is = 16 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp2w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .fast_imm16, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1w, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .word, .is = 16 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp2w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .fast_imm16, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1w, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .word, .is = 16 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp2w, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp2d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp2d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp2w, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp3d, .memi(.src0d, .tmp1), .tmp2d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp3w, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .fast_imm16, null, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp2w, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp2w, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp2d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp2w, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .sll, .dst0x, .src0x, .uia(32, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_d, .sra, .dst0x, .dst0x, .uia(32, .dst0, .sub_bit_size), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_d, .sll, .dst0x, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, .p_d, .sra, .dst0x, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._ps, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .yword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .yword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .sll, .dst0y, .src0y, .uia(32, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_d, .sra, .dst0y, .dst0y, .uia(32, .dst0, .sub_bit_size), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .yword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .yword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .vp_d, .sll, .tmp1y, .tmp1y, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_d, .sra, .tmp1y, .tmp1y, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .vp_d, .sll, .tmp1x, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .vp_d, .sra, .tmp1x, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .p_d, .sll, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, .p_d, .sra, .tmp1x, .uia(16, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, ._ps, .mova, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._ps, .mova, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, ._ps, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .dword, .is = 32 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memia(.src0d, .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp2d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .dword, .is = 32 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp2d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .dword, .is = 32 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp2d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .dword, .is = 32 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .yword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .yword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), .tmp1d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp2d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .yword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"8", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .dword, .is = 32 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp2d, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp2d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp2d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp2d, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .bmi2, null, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp3d, .memi(.src0d, .tmp1), .tmp2d, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp3d, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2d, .memi(.src0d, .tmp1), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp2d, .sa(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_unaligned_size), .tmp2d, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                        .{ ._, .vp_q, .add, .dst0x, .dst0x, .lea(.tmp0x), ._ },
+                        .{ ._, .vp_, .xor, .dst0x, .dst0x, .lea(.tmp0x), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                        .{ ._, .p_q, .add, .dst0x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, .p_, .xor, .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._ps, .@"and", .dst0x, .lea(.tmp0x), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .scalar_signed_int = .{ .of = .yword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp2), ._, ._ },
+                        .{ ._, .vp_q, .add, .dst0y, .dst0y, .lea(.tmp0y), ._ },
+                        .{ ._, .vp_, .xor, .dst0y, .dst0y, .lea(.tmp0y), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2y, .lea(.tmp0y), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp3y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .vp_q, .add, .tmp3y, .tmp3y, .tmp2y, ._ },
+                        .{ ._, .vp_, .xor, .tmp3y, .tmp3y, .tmp2y, ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp3y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1y, .lea(.tmp0y), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp2y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp3x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .vp_q, .add, .tmp3x, .tmp3x, .tmp2x, ._ },
+                        .{ ._, .vp_, .xor, .tmp3x, .tmp3x, .tmp2x, ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", .vp_, .@"and", .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .dst0 } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp4), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp5), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp3x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp3x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, .p_q, .add, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .kind = .{ .umax_mem = .{ .ref = .dst0, .to_signedness = .unsigned } } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ },
+                        .{ ._, ._ps, .mova, .tmp1x, .lea(.tmp0x), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._ps, .mova, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, ._ps, .@"and", .tmp2x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .qword, .is = 64 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2q, .memia(.src0q, .tmp0, .add_unaligned_size), .tmp1q, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp2q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1q, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .qword, .is = 64 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2q, .memsia(.src0q, .@"2", .tmp0, .add_unaligned_size), .tmp1q, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp2q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"2", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .yword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .qword, .is = 64 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .yword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp1q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp1q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .yword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp2q, .memsia(.src0q, .@"4", .tmp0, .add_unaligned_size), .tmp1q, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp2q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .yword } }, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"4", .tmp0, .add_unaligned_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_exact_int = .{ .of = .qword, .is = 64 } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2q, .memi(.src0q, .tmp1), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp2q, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2q, .memi(.src0q, .tmp1), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp2q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp2q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp2q, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_bit_size), ._, ._ },
+                        .{ .@"0:", ._, .bzhi, .tmp3q, .memi(.src0q, .tmp1), .tmp2q, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp3q, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.dst0, .sub_unaligned_size), ._, ._ },
+                        .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp2q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp2q, .memi(.src0q, .tmp1), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_unaligned_size), .tmp2q, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1d, .leaa(.tmp1, .add_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .slow_incdec, null, null },
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.{ .scalar_exact_remainder_int = .{ .of = .xword, .is = .xword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sa(.dst0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sa2(.src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_int, .any },
+                    .dst_constraints = .{.{ .scalar_exact_remainder_int = .{ .of = .xword, .is = .xword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sa(.dst0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sa2(.src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .slow_incdec, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .scalar_exact_remainder_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-2, .dst0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
+                        .{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
+                        .{ ._, ._, .mov, .lead(.tmp2q, 8), .tmp3q, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .scalar_exact_remainder_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-2, .dst0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
+                        .{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
+                        .{ ._, ._, .mov, .lead(.tmp2q, 8), .tmp3q, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .slow_incdec, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-2, .dst0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp3q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp3q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
+                        .{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
+                        .{ ._, ._, .mov, .lead(.tmp2q, 8), .tmp3q, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .none } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-2, .dst0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp3q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp3q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp3q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
+                        .{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
+                        .{ ._, ._, .mov, .lead(.tmp2q, 8), .tmp3q, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .slow_incdec, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_signed_int = .{ .of = .xword, .is = .xword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
@@ -28723,7 +33846,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
-                        .{ .type = .i64, .kind = .{ .reg = .rax } },
+                        .{ .type = .u64, .kind = .{ .reg = .rax } },
                         .unused,
                         .unused,
                         .unused,
@@ -28735,20 +33858,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
                         .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
-                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-1, .src0, .add_elem_size_div_8), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-1, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._sq, .lod, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp4q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp4q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp4q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
                         .{ ._, ._sq, .sto, ._, ._, ._, ._ },
-                        .{ ._, ._r, .sa, .tmp4q, .ui(63), ._, ._ },
-                        .{ ._, ._, .mov, .tmp3d, .sa2(.dst0, .src0, .add_delta_elem_size_div_8), ._, ._ },
-                        .{ ._, .@"rep _sq", .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
                         .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", null, null, null },
                     .src_constraints = .{ .any_scalar_signed_int, .any },
-                    .dst_constraints = .{.any_scalar_signed_int},
+                    .dst_constraints = .{.{ .scalar_remainder_signed_int = .{ .of = .xword, .is = .xword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
@@ -28757,7 +33880,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
-                        .{ .type = .i64, .kind = .{ .reg = .rax } },
+                        .{ .type = .u64, .kind = .{ .reg = .rax } },
                         .unused,
                         .unused,
                         .unused,
@@ -28769,20 +33892,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
                         .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
-                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-1, .src0, .add_elem_size_div_8), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-1, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._sq, .lod, ._, ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp4q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._l, .sa, .tmp4q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
+                        .{ ._, ._r, .sa, .tmp4q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
                         .{ ._, ._sq, .sto, ._, ._, ._, ._ },
-                        .{ ._, ._r, .sa, .tmp4q, .ui(63), ._, ._ },
-                        .{ ._, ._, .mov, .tmp3d, .sa2(.dst0, .src0, .add_delta_elem_size_div_8), ._, ._ },
-                        .{ ._, .@"rep _sq", .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", .slow_incdec, null, null },
-                    .src_constraints = .{ .any_scalar_int, .any },
-                    .dst_constraints = .{.any_scalar_int},
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_exact_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
@@ -28790,8 +33913,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
-                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
                         .{ .type = .u64, .kind = .{ .reg = .rax } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
                         .unused,
                         .unused,
                         .unused,
@@ -28803,18 +33926,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
                         .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
-                        .{ .@"0:", ._, .mov, .tmp3d, .sa(.src0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp4d, .sia(-1, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
-                        .{ ._, ._, .mov, .tmp3d, .sa2(.dst0, .src0, .add_delta_elem_size_div_8), ._, ._ },
-                        .{ ._, .@"rep _sq", .sto, ._, ._, ._, ._ },
+                        .{ ._, ._sq, .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sa2(.src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
                         .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_scalar_int, .any },
-                    .dst_constraints = .{.any_scalar_int},
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_exact_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
@@ -28822,8 +33945,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
-                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
                         .{ .type = .u64, .kind = .{ .reg = .rax } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
                         .unused,
                         .unused,
                         .unused,
@@ -28835,368 +33958,98 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
                         .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
                         .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
-                        .{ .@"0:", ._, .mov, .tmp3d, .sa(.src0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp4d, .sia(-1, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
-                        .{ ._, ._, .mov, .tmp3d, .sa2(.dst0, .src0, .add_delta_elem_size_div_8), ._, ._ },
-                        .{ ._, .@"rep _sq", .sto, ._, ._, ._, ._ },
+                        .{ ._, ._sq, .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sa2(.src0, .dst0, .add_delta_elem_size), ._, ._ },
                         .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
                         .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
-                } }) catch |err| switch (err) {
-                    error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
-                        @tagName(air_tag),
-                        dst_ty.fmt(pt),
-                        src_ty.fmt(pt),
-                        ops[0].tracking(cg),
-                    }),
-                    else => |e| return e,
-                };
-                try res[0].finish(inst, &.{ty_op.operand}, &ops, cg);
-            },
-            .trunc => |air_tag| if (use_old) try cg.airTrunc(inst) else fallback: {
-                const ty_op = air_datas[@intFromEnum(inst)].ty_op;
-                if (ty_op.ty.toType().isVector(zcu)) break :fallback try cg.airTrunc(inst);
-                var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
-                var res: [1]Temp = undefined;
-                cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{
-                    .src_constraints = .{ .{ .signed_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .exact_signed_int = 1 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .@"and", .dst0d, .si(1), ._, ._ },
-                        .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .any_signed_int, .any },
-                    .dst_constraints = .{.{ .exact_signed_int = 1 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .movzx, .dst0d, .mem(.src0b), ._, ._ },
-                        .{ ._, ._, .@"and", .dst0d, .si(1), ._, ._ },
-                        .{ ._, ._, .neg, .dst0d, ._, ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .int = .gpr }, .any },
-                    .dst_constraints = .{.{ .exact_int = 8 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .each = .{ .once = &.{} },
-                }, .{
-                    .src_constraints = .{ .any_signed_int, .any },
-                    .dst_constraints = .{.{ .exact_signed_int = 8 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .movsx, .dst0d, .mem(.src0b), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .exact_unsigned_int = 8 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .movzx, .dst0d, .mem(.src0b), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .signed_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .signed_int = .byte }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._l, .sa, .dst0b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
-                        .{ ._, ._r, .sa, .dst0b, .uia(8, .dst0, .sub_bit_size), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .byte }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .@"and", .dst0b, .sa(.dst0, .add_umax), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .any_int, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .byte }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .movzx, .dst0d, .mem(.src0b), ._, ._ },
-                        .{ ._, ._, .@"and", .dst0b, .sa(.dst0, .add_umax), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .int = .gpr }, .any },
-                    .dst_constraints = .{.{ .exact_int = 16 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .each = .{ .once = &.{} },
-                }, .{
-                    .src_constraints = .{ .any_signed_int, .any },
-                    .dst_constraints = .{.{ .exact_signed_int = 16 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .movsx, .dst0d, .mem(.src0w), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .exact_unsigned_int = 16 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .movzx, .dst0d, .mem(.src0w), ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .fast_imm16, null, null, null },
-                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .word }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .@"and", .dst0w, .sa(.dst0, .add_umax), ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .fast_imm16, null, null, null },
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .word }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .movzx, .dst0d, .mem(.src0w), ._, ._ },
-                        .{ ._, ._, .@"and", .dst0w, .sa(.dst0, .add_umax), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .int = .gpr }, .any },
-                    .dst_constraints = .{.{ .exact_int = 32 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .each = .{ .once = &.{} },
-                }, .{
-                    .src_constraints = .{ .any_int, .any },
-                    .dst_constraints = .{.{ .exact_int = 32 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0d, .mem(.src0d), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .signed_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .signed_int = .dword }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._l, .sa, .dst0d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
-                        .{ ._, ._r, .sa, .dst0d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .any_signed_int, .any },
-                    .dst_constraints = .{.{ .signed_int = .dword }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0d, .mem(.src0d), ._, ._ },
-                        .{ ._, ._l, .sa, .dst0d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
-                        .{ ._, ._r, .sa, .dst0d, .uia(32, .dst0, .sub_bit_size), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .dword }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .@"and", .dst0d, .sa(.dst0, .add_umax), ._, ._ },
-                    } },
-                }, .{
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .dword }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0d, .mem(.src0d), ._, ._ },
-                        .{ ._, ._, .@"and", .dst0d, .sa(.dst0, .add_umax), ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_int, .any },
-                    .dst_constraints = .{.{ .exact_int = 64 }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0q, .mem(.src0q), ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .{ .signed_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .signed_int = .qword }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mut_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .ref = .src0 }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._l, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
-                        .{ ._, ._r, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
-                    } },
                 }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_signed_int, .any },
-                    .dst_constraints = .{.{ .signed_int = .qword }},
+                    .required_features = .{ .@"64bit", .bmi2, .slow_incdec, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0q, .mem(.src0q), ._, ._ },
-                        .{ ._, ._l, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
-                        .{ ._, ._r, .sa, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", .bmi2, null, null },
-                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .qword }},
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
-                        .{ .src = .{ .to_gpr, .none } },
+                    .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .usize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .reg = .ecx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0d, .sa(.dst0, .add_bit_size), ._, ._ },
-                        .{ ._, ._, .bzhi, .dst0q, .src0q, .dst0q, ._ },
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_bit_size_rem_64), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp4d, .sia(-2, .dst0, .add_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .bzhi, .tmp4q, .lea(.tmp1q), .tmp3q, ._ },
+                        .{ ._, ._, .mov, .lea(.tmp2q), .tmp4q, ._, ._ },
+                        .{ ._, ._, .mov, .lead(.tmp2q, 8), .si(0), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", .bmi2, null, null },
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .qword }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0d, .sa(.dst0, .add_bit_size), ._, ._ },
-                        .{ ._, ._, .bzhi, .dst0q, .mem(.src0q), .dst0q, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .{ .unsigned_int = .gpr }, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .qword }},
-                    .patterns = &.{
-                        .{ .src = .{ .mem, .none } },
-                        .{ .src = .{ .to_gpr, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0q, .ua(.dst0, .add_umax), ._, ._ },
-                        .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .unsigned_int = .qword }},
-                    .patterns = &.{
-                        .{ .src = .{ .to_mem, .none } },
-                    },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
-                    .clobbers = .{ .eflags = true },
-                    .each = .{ .once = &.{
-                        .{ ._, ._, .mov, .dst0q, .ua(.dst0, .add_umax), ._, ._ },
-                        .{ ._, ._, .@"and", .dst0q, .mem(.src0q), ._, ._ },
-                    } },
-                }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_int, .any },
-                    .dst_constraints = .{.{ .exact_remainder_int = .{ .of = .xword, .is = .xword } }},
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
                         .unused,
                         .unused,
                         .unused,
                         .unused,
-                        .unused,
-                        .unused,
                     },
                     .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
-                        .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_bit_size_rem_64), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp4d, .sia(-2, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .bzhi, .tmp4q, .lea(.tmp1q), .tmp3q, ._ },
+                        .{ ._, ._, .mov, .lea(.tmp2q), .tmp4q, ._, ._ },
+                        .{ ._, ._, .mov, .lead(.tmp2q, 8), .si(0), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_signed_int, .any },
-                    .dst_constraints = .{.{ .exact_remainder_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .required_features = .{ .@"64bit", .bmi2, .slow_incdec, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .xword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
-                        .unused,
-                        .unused,
-                        .unused,
+                        .{ .type = .u64, .kind = .{ .reg = .rax } },
                         .unused,
                         .unused,
                         .unused,
@@ -29204,29 +34057,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
-                        .{ ._, ._, .mov, .tmp2d, .sia(-2, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_bit_size_rem_64), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp4d, .sia(-1, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
-                        .{ ._, ._r, .sa, .tmp0q, .ui(63), ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+                        .{ ._, ._, .bzhi, .tmp5q, .lea(.tmp1q), .tmp3q, ._ },
+                        .{ ._, ._sq, .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_signed_int, .any },
-                    .dst_constraints = .{.{ .remainder_signed_int = .{ .of = .xword, .is = .qword } }},
+                    .required_features = .{ .@"64bit", .bmi2, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .xword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
-                        .unused,
-                        .unused,
-                        .unused,
+                        .{ .type = .u64, .kind = .{ .reg = .rax } },
                         .unused,
                         .unused,
                         .unused,
@@ -29234,25 +34090,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
-                        .{ ._, ._, .mov, .tmp2d, .sia(-2, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_bit_size_rem_64), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp4d, .sia(-1, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
-                        .{ ._, ._l, .sa, .tmp0q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
-                        .{ ._, ._r, .sa, .tmp0q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
-                        .{ ._, ._r, .sa, .tmp0q, .ui(63), ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+                        .{ ._, ._, .bzhi, .tmp5q, .lea(.tmp1q), .tmp3q, ._ },
+                        .{ ._, ._sq, .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_signed_int, .any },
-                    .dst_constraints = .{.{ .remainder_signed_int = .{ .of = .xword, .is = .xword } }},
+                    .required_features = .{ .@"64bit", .slow_incdec, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
@@ -29261,28 +34119,33 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
-                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
-                        .{ ._, ._, .mov, .tmp2d, .sia(-1, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-2, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ },
-                        .{ ._, ._l, .sa, .tmp0q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
-                        .{ ._, ._r, .sa, .tmp0q, .uia(64, .dst0, .sub_bit_size_rem_64), ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+                        .{ ._, ._, .mov, .tmp3q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp3q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
+                        .{ ._, ._, .mov, .lead(.tmp2q, 8), .si(0), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .exact_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
@@ -29291,75 +34154,91 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         .unused,
                         .unused,
                         .unused,
-                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
-                        .{ ._, ._, .mov, .tmp2d, .sia(-1, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp3d, .sia(-2, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp3q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._, .mov, .lea(.tmp2q), .tmp3q, ._, ._ },
+                        .{ ._, ._, .mov, .lead(.tmp2q, 8), .si(0), ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .lead(.tmp2, 16), ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .remainder_unsigned_int = .{ .of = .xword, .is = .qword } }},
+                    .required_features = .{ .@"64bit", .slow_incdec, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .xword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rax } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
                         .unused,
                         .unused,
                         .unused,
                         .unused,
-                        .unused,
-                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
-                        .{ ._, ._, .mov, .tmp2d, .sia(-2, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp4d, .sia(-1, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp0q, .ua(.dst0, .add_umax), ._, ._ },
-                        .{ ._, ._, .@"and", .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp4q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp4q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._sq, .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
+                        .{ ._, ._a, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", null, null, null },
-                    .src_constraints = .{ .any_unsigned_int, .any },
-                    .dst_constraints = .{.{ .remainder_unsigned_int = .{ .of = .xword, .is = .xword } }},
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any },
+                    .dst_constraints = .{.{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .xword } }},
                     .patterns = &.{
                         .{ .src = .{ .to_mem, .none } },
                     },
                     .extra_temps = .{
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
                         .{ .type = .usize, .kind = .{ .reg = .rsi } },
                         .{ .type = .usize, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rax } },
                         .{ .type = .u32, .kind = .{ .reg = .ecx } },
                         .unused,
                         .unused,
                         .unused,
                         .unused,
-                        .unused,
-                        .unused,
                     },
                     .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
-                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
-                        .{ ._, ._, .mov, .tmp2d, .sia(-1, .dst0, .add_size_div_8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_len), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp4d, .sia(-1, .dst0, .add_elem_size_div_8), ._, ._ },
                         .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
-                        .{ ._, ._, .mov, .tmp0q, .ua(.dst0, .add_umax), ._, ._ },
-                        .{ ._, ._, .@"and", .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ },
-                        .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ },
+                        .{ ._, ._, .mov, .tmp4q, .ua(.dst0, .add_umax), ._, ._ },
+                        .{ ._, ._, .@"and", .tmp4q, .lea(.tmp1q), ._, ._ },
+                        .{ ._, ._sq, .sto, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp1p, .sia2(8, .src0, .dst0, .add_delta_elem_size), ._, ._ },
+                        .{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 } }) catch |err| switch (err) {
                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@@ -50265,7 +55144,15 @@ const Select = struct {
         multiple_scalar_int: OfIsSizes,
         multiple_scalar_signed_int: OfIsSizes,
         multiple_scalar_unsigned_int: OfIsSizes,
+        multiple_scalar_exact_int: struct { of: Memory.Size, is: u16 },
+        multiple_scalar_exact_signed_int: struct { of: Memory.Size, is: u16 },
+        multiple_scalar_exact_unsigned_int: struct { of: Memory.Size, is: u16 },
         scalar_remainder_int: OfIsSizes,
+        scalar_remainder_signed_int: OfIsSizes,
+        scalar_remainder_unsigned_int: OfIsSizes,
+        scalar_exact_remainder_int: OfIsSizes,
+        scalar_exact_remainder_signed_int: OfIsSizes,
+        scalar_exact_remainder_unsigned_int: OfIsSizes,
         float: Memory.Size,
         scalar_any_float: Memory.Size,
         scalar_float: OfIsSizes,
@@ -50352,10 +55239,30 @@ const Select = struct {
                 .multiple_scalar_unsigned_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
                     if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
                     of_is.is.bitSize(cg.target) >= int_info.bits else false,
+                .multiple_scalar_exact_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| of_is.is == int_info.bits else false,
+                .multiple_scalar_exact_signed_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .signed and
+                    of_is.is == int_info.bits else false,
+                .multiple_scalar_exact_unsigned_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
+                    of_is.is == int_info.bits else false,
                 .scalar_remainder_int => |of_is| if (cg.intInfo(ty.scalarType(zcu))) |int_info|
                     of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1
                 else
                     false,
+                .scalar_remainder_signed_int => |of_is| if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .signed and
+                    of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 else false,
+                .scalar_remainder_unsigned_int => |of_is| if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
+                    of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 else false,
+                .scalar_exact_remainder_int => |of_is| if (cg.intInfo(ty.scalarType(zcu))) |int_info|
+                    of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1
+                else
+                    false,
+                .scalar_exact_remainder_signed_int => |of_is| if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .signed and
+                    of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 else false,
+                .scalar_exact_remainder_unsigned_int => |of_is| if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
+                    of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 else false,
                 .float => |size| if (cg.floatBits(ty)) |float_bits| size.bitSize(cg.target) == float_bits else false,
                 .scalar_any_float => |size| @divExact(size.bitSize(cg.target), 8) >= ty.abiSize(zcu) and
                     cg.floatBits(ty.scalarType(zcu)) != null,
@@ -50629,7 +55536,11 @@ const Select = struct {
             frame: FrameIndex,
             symbol: *const struct { lib: ?[]const u8 = null, name: []const u8 },
 
-            const ConstInfo = struct { ref: Select.Operand.Ref, vectorize_to: ?Memory.Size = null };
+            const ConstInfo = struct {
+                ref: Select.Operand.Ref,
+                to_signedness: ?std.builtin.Signedness = null,
+                vectorize_to: ?Memory.Size = null,
+            };
 
             fn finish(kind: Kind, temp: Temp, s: *const Select) void {
                 switch (kind) {
@@ -50704,7 +55615,8 @@ const Select = struct {
                                 .signedness = .signed,
                                 .bits = cg.floatBits(scalar_ty).?,
                             };
-                            const scalar_int_ty = try pt.intType(scalar_info.signedness, scalar_info.bits);
+                            const scalar_signedness = const_info.to_signedness orelse scalar_info.signedness;
+                            const scalar_int_ty = try pt.intType(scalar_signedness, scalar_info.bits);
                             if (scalar_info.bits <= 64) {
                                 const int_val: i64 = switch (spec.kind) {
                                     else => unreachable,
@@ -50714,7 +55626,7 @@ const Select = struct {
                                     .umax_mem => -1,
                                 };
                                 const shift: u6 = @intCast(64 - scalar_info.bits);
-                                break :res_scalar .{ scalar_int_ty, switch (scalar_info.signedness) {
+                                break :res_scalar .{ scalar_int_ty, switch (scalar_signedness) {
                                     .signed => try pt.intValue_i64(scalar_int_ty, int_val >> shift),
                                     .unsigned => try pt.intValue_u64(scalar_int_ty, @as(u64, @bitCast(int_val)) >> shift),
                                 } };
@@ -50730,7 +55642,7 @@ const Select = struct {
                                 .smin_mem, .smax_mem => .signed,
                                 .umin_mem, .umax_mem => .unsigned,
                             }, scalar_info.bits);
-                            try big_int.truncate(&big_int, scalar_info.signedness, scalar_info.bits);
+                            try big_int.truncate(&big_int, scalar_signedness, scalar_info.bits);
                             break :res_scalar .{ scalar_int_ty, try pt.intValue_big(scalar_int_ty, big_int.toConst()) };
                         },
                     };
@@ -50832,7 +55744,8 @@ const Select = struct {
             const sub_unaligned_size: Adjust = .{ .sign = .neg, .lhs = .unaligned_size, .op = .mul, .rhs = .@"1" };
             const add_2_bit_size: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .mul, .rhs = .@"2" };
             const add_bit_size: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .mul, .rhs = .@"1" };
-            const sub_bit_size_rem_64: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .rem_8_mul, .rhs = .@"8" };
+            const add_bit_size_rem_64: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .rem_8_mul, .rhs = .@"8" };
+            const sub_bit_size_rem_64: Adjust = .{ .sign = .neg, .lhs = .bit_size, .op = .rem_8_mul, .rhs = .@"8" };
             const sub_bit_size: Adjust = .{ .sign = .neg, .lhs = .bit_size, .op = .mul, .rhs = .@"1" };
             const add_src0_bit_size: Adjust = .{ .sign = .pos, .lhs = .src0_bit_size, .op = .mul, .rhs = .@"1" };
             const sub_src0_bit_size: Adjust = .{ .sign = .neg, .lhs = .src0_bit_size, .op = .mul, .rhs = .@"1" };
@@ -51163,6 +56076,9 @@ const Select = struct {
         fn sia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand {
             return .{ .flags = .{ .tag = .simm, .adjust = adjust }, .base = base, .imm = imm };
         }
+        fn sia2(imm: i32, base: Ref.Sized, index: Ref, adjust: Adjust) Select.Operand {
+            return .{ .flags = .{ .tag = .simm, .adjust = adjust }, .base = base, .index = .{ .ref = index }, .imm = imm };
+        }
         fn ui(imm: u32) Select.Operand {
             return .{ .flags = .{ .tag = .uimm }, .imm = @bitCast(imm) };
         }
src/Sema.zig
@@ -23751,23 +23751,27 @@ fn zirTruncate(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai
                 @tagName(dest_info.signedness), operand_ty.fmt(pt),
             });
         }
-        if (operand_info.bits < dest_info.bits) {
-            const msg = msg: {
-                const msg = try sema.errMsg(
-                    src,
-                    "destination type '{}' has more bits than source type '{}'",
-                    .{ dest_ty.fmt(pt), operand_ty.fmt(pt) },
-                );
-                errdefer msg.destroy(sema.gpa);
-                try sema.errNote(src, msg, "destination type has {d} bits", .{
-                    dest_info.bits,
-                });
-                try sema.errNote(operand_src, msg, "operand type has {d} bits", .{
-                    operand_info.bits,
-                });
-                break :msg msg;
-            };
-            return sema.failWithOwnedErrorMsg(block, msg);
+        switch (std.math.order(dest_info.bits, operand_info.bits)) {
+            .gt => {
+                const msg = msg: {
+                    const msg = try sema.errMsg(
+                        src,
+                        "destination type '{}' has more bits than source type '{}'",
+                        .{ dest_ty.fmt(pt), operand_ty.fmt(pt) },
+                    );
+                    errdefer msg.destroy(sema.gpa);
+                    try sema.errNote(src, msg, "destination type has {d} bits", .{
+                        dest_info.bits,
+                    });
+                    try sema.errNote(operand_src, msg, "operand type has {d} bits", .{
+                        operand_info.bits,
+                    });
+                    break :msg msg;
+                };
+                return sema.failWithOwnedErrorMsg(block, msg);
+            },
+            .eq => return operand,
+            .lt => {},
         }
     }
 
test/behavior/x86_64/math.zig
@@ -5223,719 +5223,2063 @@ fn cast(comptime op: anytype, comptime opts: struct { strict: bool = false }) ty
             try testArgs(f128, f128, nan(f128));
         }
         fn testSameSignednessIntVectors() !void {
+            try testArgs(@Vector(1, i7), @Vector(1, i1), .{-1});
             try testArgs(@Vector(1, i8), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i9), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i15), @Vector(1, i1), .{-1});
             try testArgs(@Vector(1, i16), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i17), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i31), @Vector(1, i1), .{-1});
             try testArgs(@Vector(1, i32), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i33), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i63), @Vector(1, i1), .{-1});
             try testArgs(@Vector(1, i64), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i65), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i127), @Vector(1, i1), .{-1});
             try testArgs(@Vector(1, i128), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i129), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i255), @Vector(1, i1), .{-1});
             try testArgs(@Vector(1, i256), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i257), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i511), @Vector(1, i1), .{-1});
             try testArgs(@Vector(1, i512), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i513), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i1023), @Vector(1, i1), .{-1});
             try testArgs(@Vector(1, i1024), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, i1025), @Vector(1, i1), .{-1});
+            try testArgs(@Vector(1, u7), @Vector(1, u1), .{1});
             try testArgs(@Vector(1, u8), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u9), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u15), @Vector(1, u1), .{1});
             try testArgs(@Vector(1, u16), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u17), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u31), @Vector(1, u1), .{1});
             try testArgs(@Vector(1, u32), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u33), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u63), @Vector(1, u1), .{1});
             try testArgs(@Vector(1, u64), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u65), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u127), @Vector(1, u1), .{1});
             try testArgs(@Vector(1, u128), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u129), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u255), @Vector(1, u1), .{1});
             try testArgs(@Vector(1, u256), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u257), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u511), @Vector(1, u1), .{1});
             try testArgs(@Vector(1, u512), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u513), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u1023), @Vector(1, u1), .{1});
             try testArgs(@Vector(1, u1024), @Vector(1, u1), .{1});
+            try testArgs(@Vector(1, u1025), @Vector(1, u1), .{1});
 
+            try testArgs(@Vector(2, i7), @Vector(2, i1), .{ -1, 0 });
             try testArgs(@Vector(2, i8), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i9), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i15), @Vector(2, i1), .{ -1, 0 });
             try testArgs(@Vector(2, i16), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i17), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i31), @Vector(2, i1), .{ -1, 0 });
             try testArgs(@Vector(2, i32), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i33), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i63), @Vector(2, i1), .{ -1, 0 });
             try testArgs(@Vector(2, i64), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i65), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i127), @Vector(2, i1), .{ -1, 0 });
             try testArgs(@Vector(2, i128), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i129), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i255), @Vector(2, i1), .{ -1, 0 });
             try testArgs(@Vector(2, i256), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i257), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i511), @Vector(2, i1), .{ -1, 0 });
             try testArgs(@Vector(2, i512), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i513), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i1023), @Vector(2, i1), .{ -1, 0 });
             try testArgs(@Vector(2, i1024), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, i1025), @Vector(2, i1), .{ -1, 0 });
+            try testArgs(@Vector(2, u7), @Vector(2, u1), .{ 0, 1 });
             try testArgs(@Vector(2, u8), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u9), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u15), @Vector(2, u1), .{ 0, 1 });
             try testArgs(@Vector(2, u16), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u17), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u31), @Vector(2, u1), .{ 0, 1 });
             try testArgs(@Vector(2, u32), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u33), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u63), @Vector(2, u1), .{ 0, 1 });
             try testArgs(@Vector(2, u64), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u65), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u127), @Vector(2, u1), .{ 0, 1 });
             try testArgs(@Vector(2, u128), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u129), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u255), @Vector(2, u1), .{ 0, 1 });
             try testArgs(@Vector(2, u256), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u257), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u511), @Vector(2, u1), .{ 0, 1 });
             try testArgs(@Vector(2, u512), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u513), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u1023), @Vector(2, u1), .{ 0, 1 });
             try testArgs(@Vector(2, u1024), @Vector(2, u1), .{ 0, 1 });
+            try testArgs(@Vector(2, u1025), @Vector(2, u1), .{ 0, 1 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i2), .{ -1 << 1, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i2), .{ -1 << 1, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i2), .{ -1 << 1, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i2), .{ -1 << 1, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i2), .{ -1 << 1, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i2), .{ -1 << 1, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i2), .{ -1 << 1, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i2), .{ -1 << 1, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i2), .{ -1 << 1, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u2), .{ 0, 1, 1 << 1 });
             try testArgs(@Vector(3, u8), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u9), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u15), @Vector(3, u2), .{ 0, 1, 1 << 1 });
             try testArgs(@Vector(3, u16), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u17), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u31), @Vector(3, u2), .{ 0, 1, 1 << 1 });
             try testArgs(@Vector(3, u32), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u33), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u63), @Vector(3, u2), .{ 0, 1, 1 << 1 });
             try testArgs(@Vector(3, u64), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u65), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u127), @Vector(3, u2), .{ 0, 1, 1 << 1 });
             try testArgs(@Vector(3, u128), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u129), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u255), @Vector(3, u2), .{ 0, 1, 1 << 1 });
             try testArgs(@Vector(3, u256), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u257), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u511), @Vector(3, u2), .{ 0, 1, 1 << 1 });
             try testArgs(@Vector(3, u512), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u513), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u2), .{ 0, 1, 1 << 1 });
             try testArgs(@Vector(3, u1024), @Vector(3, u2), .{ 0, 1, 1 << 1 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u2), .{ 0, 1, 1 << 1 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i3), .{ -1 << 2, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i3), .{ -1 << 2, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i3), .{ -1 << 2, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i3), .{ -1 << 2, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i3), .{ -1 << 2, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i3), .{ -1 << 2, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i3), .{ -1 << 2, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i3), .{ -1 << 2, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i3), .{ -1 << 2, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u3), .{ 0, 1, 1 << 2 });
             try testArgs(@Vector(3, u8), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u9), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u15), @Vector(3, u3), .{ 0, 1, 1 << 2 });
             try testArgs(@Vector(3, u16), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u17), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u31), @Vector(3, u3), .{ 0, 1, 1 << 2 });
             try testArgs(@Vector(3, u32), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u33), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u63), @Vector(3, u3), .{ 0, 1, 1 << 2 });
             try testArgs(@Vector(3, u64), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u65), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u127), @Vector(3, u3), .{ 0, 1, 1 << 2 });
             try testArgs(@Vector(3, u128), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u129), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u255), @Vector(3, u3), .{ 0, 1, 1 << 2 });
             try testArgs(@Vector(3, u256), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u257), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u511), @Vector(3, u3), .{ 0, 1, 1 << 2 });
             try testArgs(@Vector(3, u512), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u513), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u3), .{ 0, 1, 1 << 2 });
             try testArgs(@Vector(3, u1024), @Vector(3, u3), .{ 0, 1, 1 << 2 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u3), .{ 0, 1, 1 << 2 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i4), .{ -1 << 3, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i4), .{ -1 << 3, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i4), .{ -1 << 3, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i4), .{ -1 << 3, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i4), .{ -1 << 3, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i4), .{ -1 << 3, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i4), .{ -1 << 3, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i4), .{ -1 << 3, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i4), .{ -1 << 3, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u4), .{ 0, 1, 1 << 3 });
             try testArgs(@Vector(3, u8), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u9), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u15), @Vector(3, u4), .{ 0, 1, 1 << 3 });
             try testArgs(@Vector(3, u16), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u17), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u31), @Vector(3, u4), .{ 0, 1, 1 << 3 });
             try testArgs(@Vector(3, u32), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u33), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u63), @Vector(3, u4), .{ 0, 1, 1 << 3 });
             try testArgs(@Vector(3, u64), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u65), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u127), @Vector(3, u4), .{ 0, 1, 1 << 3 });
             try testArgs(@Vector(3, u128), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u129), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u255), @Vector(3, u4), .{ 0, 1, 1 << 3 });
             try testArgs(@Vector(3, u256), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u257), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u511), @Vector(3, u4), .{ 0, 1, 1 << 3 });
             try testArgs(@Vector(3, u512), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u513), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u4), .{ 0, 1, 1 << 3 });
             try testArgs(@Vector(3, u1024), @Vector(3, u4), .{ 0, 1, 1 << 3 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u4), .{ 0, 1, 1 << 3 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i5), .{ -1 << 4, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i5), .{ -1 << 4, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i5), .{ -1 << 4, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i5), .{ -1 << 4, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i5), .{ -1 << 4, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i5), .{ -1 << 4, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i5), .{ -1 << 4, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i5), .{ -1 << 4, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i5), .{ -1 << 4, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u5), .{ 0, 1, 1 << 4 });
             try testArgs(@Vector(3, u8), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u9), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u15), @Vector(3, u5), .{ 0, 1, 1 << 4 });
             try testArgs(@Vector(3, u16), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u17), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u31), @Vector(3, u5), .{ 0, 1, 1 << 4 });
             try testArgs(@Vector(3, u32), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u33), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u63), @Vector(3, u5), .{ 0, 1, 1 << 4 });
             try testArgs(@Vector(3, u64), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u65), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u127), @Vector(3, u5), .{ 0, 1, 1 << 4 });
             try testArgs(@Vector(3, u128), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u129), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u255), @Vector(3, u5), .{ 0, 1, 1 << 4 });
             try testArgs(@Vector(3, u256), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u257), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u511), @Vector(3, u5), .{ 0, 1, 1 << 4 });
             try testArgs(@Vector(3, u512), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u513), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u5), .{ 0, 1, 1 << 4 });
             try testArgs(@Vector(3, u1024), @Vector(3, u5), .{ 0, 1, 1 << 4 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u5), .{ 0, 1, 1 << 4 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i7), .{ -1 << 6, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i7), .{ -1 << 6, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i7), .{ -1 << 6, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i7), .{ -1 << 6, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i7), .{ -1 << 6, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i7), .{ -1 << 6, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i7), .{ -1 << 6, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i7), .{ -1 << 6, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i7), .{ -1 << 6, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u7), .{ 0, 1, 1 << 6 });
             try testArgs(@Vector(3, u8), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u9), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u15), @Vector(3, u7), .{ 0, 1, 1 << 6 });
             try testArgs(@Vector(3, u16), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u17), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u31), @Vector(3, u7), .{ 0, 1, 1 << 6 });
             try testArgs(@Vector(3, u32), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u33), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u63), @Vector(3, u7), .{ 0, 1, 1 << 6 });
             try testArgs(@Vector(3, u64), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u65), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u127), @Vector(3, u7), .{ 0, 1, 1 << 6 });
             try testArgs(@Vector(3, u128), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u129), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u255), @Vector(3, u7), .{ 0, 1, 1 << 6 });
             try testArgs(@Vector(3, u256), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u257), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u511), @Vector(3, u7), .{ 0, 1, 1 << 6 });
             try testArgs(@Vector(3, u512), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u513), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u7), .{ 0, 1, 1 << 6 });
             try testArgs(@Vector(3, u1024), @Vector(3, u7), .{ 0, 1, 1 << 6 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u7), .{ 0, 1, 1 << 6 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i8), .{ -1 << 7, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i8), .{ -1 << 7, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i8), .{ -1 << 7, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i8), .{ -1 << 7, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i8), .{ -1 << 7, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i8), .{ -1 << 7, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i8), .{ -1 << 7, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i8), .{ -1 << 7, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i8), .{ -1 << 7, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u8), .{ 0, 1, 1 << 7 });
             try testArgs(@Vector(3, u8), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u9), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u15), @Vector(3, u8), .{ 0, 1, 1 << 7 });
             try testArgs(@Vector(3, u16), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u17), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u31), @Vector(3, u8), .{ 0, 1, 1 << 7 });
             try testArgs(@Vector(3, u32), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u33), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u63), @Vector(3, u8), .{ 0, 1, 1 << 7 });
             try testArgs(@Vector(3, u64), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u65), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u127), @Vector(3, u8), .{ 0, 1, 1 << 7 });
             try testArgs(@Vector(3, u128), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u129), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u255), @Vector(3, u8), .{ 0, 1, 1 << 7 });
             try testArgs(@Vector(3, u256), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u257), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u511), @Vector(3, u8), .{ 0, 1, 1 << 7 });
             try testArgs(@Vector(3, u512), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u513), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u8), .{ 0, 1, 1 << 7 });
             try testArgs(@Vector(3, u1024), @Vector(3, u8), .{ 0, 1, 1 << 7 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u8), .{ 0, 1, 1 << 7 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i9), .{ -1 << 8, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i9), .{ -1 << 8, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i9), .{ -1 << 8, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i9), .{ -1 << 8, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i9), .{ -1 << 8, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i9), .{ -1 << 8, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i9), .{ -1 << 8, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i9), .{ -1 << 8, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i9), .{ -1 << 8, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u9), .{ 0, 1, 1 << 8 });
             try testArgs(@Vector(3, u8), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u9), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u15), @Vector(3, u9), .{ 0, 1, 1 << 8 });
             try testArgs(@Vector(3, u16), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u17), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u31), @Vector(3, u9), .{ 0, 1, 1 << 8 });
             try testArgs(@Vector(3, u32), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u33), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u63), @Vector(3, u9), .{ 0, 1, 1 << 8 });
             try testArgs(@Vector(3, u64), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u65), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u127), @Vector(3, u9), .{ 0, 1, 1 << 8 });
             try testArgs(@Vector(3, u128), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u129), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u255), @Vector(3, u9), .{ 0, 1, 1 << 8 });
             try testArgs(@Vector(3, u256), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u257), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u511), @Vector(3, u9), .{ 0, 1, 1 << 8 });
             try testArgs(@Vector(3, u512), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u513), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u9), .{ 0, 1, 1 << 8 });
             try testArgs(@Vector(3, u1024), @Vector(3, u9), .{ 0, 1, 1 << 8 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u9), .{ 0, 1, 1 << 8 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i15), .{ -1 << 14, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i15), .{ -1 << 14, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i15), .{ -1 << 14, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i15), .{ -1 << 14, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i15), .{ -1 << 14, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i15), .{ -1 << 14, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i15), .{ -1 << 14, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i15), .{ -1 << 14, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i15), .{ -1 << 14, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u15), .{ 0, 1, 1 << 14 });
             try testArgs(@Vector(3, u8), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u9), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u15), @Vector(3, u15), .{ 0, 1, 1 << 14 });
             try testArgs(@Vector(3, u16), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u17), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u31), @Vector(3, u15), .{ 0, 1, 1 << 14 });
             try testArgs(@Vector(3, u32), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u33), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u63), @Vector(3, u15), .{ 0, 1, 1 << 14 });
             try testArgs(@Vector(3, u64), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u65), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u127), @Vector(3, u15), .{ 0, 1, 1 << 14 });
             try testArgs(@Vector(3, u128), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u129), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u255), @Vector(3, u15), .{ 0, 1, 1 << 14 });
             try testArgs(@Vector(3, u256), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u257), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u511), @Vector(3, u15), .{ 0, 1, 1 << 14 });
             try testArgs(@Vector(3, u512), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u513), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u15), .{ 0, 1, 1 << 14 });
             try testArgs(@Vector(3, u1024), @Vector(3, u15), .{ 0, 1, 1 << 14 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u15), .{ 0, 1, 1 << 14 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i16), .{ -1 << 15, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i16), .{ -1 << 15, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i16), .{ -1 << 15, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i16), .{ -1 << 15, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i16), .{ -1 << 15, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i16), .{ -1 << 15, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i16), .{ -1 << 15, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i16), .{ -1 << 15, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i16), .{ -1 << 15, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u16), .{ 0, 1, 1 << 15 });
             try testArgs(@Vector(3, u8), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u9), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u15), @Vector(3, u16), .{ 0, 1, 1 << 15 });
             try testArgs(@Vector(3, u16), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u17), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u31), @Vector(3, u16), .{ 0, 1, 1 << 15 });
             try testArgs(@Vector(3, u32), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u33), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u63), @Vector(3, u16), .{ 0, 1, 1 << 15 });
             try testArgs(@Vector(3, u64), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u65), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u127), @Vector(3, u16), .{ 0, 1, 1 << 15 });
             try testArgs(@Vector(3, u128), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u129), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u255), @Vector(3, u16), .{ 0, 1, 1 << 15 });
             try testArgs(@Vector(3, u256), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u257), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u511), @Vector(3, u16), .{ 0, 1, 1 << 15 });
             try testArgs(@Vector(3, u512), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u513), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u16), .{ 0, 1, 1 << 15 });
             try testArgs(@Vector(3, u1024), @Vector(3, u16), .{ 0, 1, 1 << 15 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u16), .{ 0, 1, 1 << 15 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i17), .{ -1 << 16, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i17), .{ -1 << 16, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i17), .{ -1 << 16, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i17), .{ -1 << 16, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i17), .{ -1 << 16, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i17), .{ -1 << 16, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i17), .{ -1 << 16, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i17), .{ -1 << 16, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i17), .{ -1 << 16, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u17), .{ 0, 1, 1 << 16 });
             try testArgs(@Vector(3, u8), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u9), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u15), @Vector(3, u17), .{ 0, 1, 1 << 16 });
             try testArgs(@Vector(3, u16), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u17), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u31), @Vector(3, u17), .{ 0, 1, 1 << 16 });
             try testArgs(@Vector(3, u32), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u33), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u63), @Vector(3, u17), .{ 0, 1, 1 << 16 });
             try testArgs(@Vector(3, u64), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u65), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u127), @Vector(3, u17), .{ 0, 1, 1 << 16 });
             try testArgs(@Vector(3, u128), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u129), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u255), @Vector(3, u17), .{ 0, 1, 1 << 16 });
             try testArgs(@Vector(3, u256), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u257), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u511), @Vector(3, u17), .{ 0, 1, 1 << 16 });
             try testArgs(@Vector(3, u512), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u513), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u17), .{ 0, 1, 1 << 16 });
             try testArgs(@Vector(3, u1024), @Vector(3, u17), .{ 0, 1, 1 << 16 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u17), .{ 0, 1, 1 << 16 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i31), .{ -1 << 30, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i31), .{ -1 << 30, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i31), .{ -1 << 30, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i31), .{ -1 << 30, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i31), .{ -1 << 30, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i31), .{ -1 << 30, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i31), .{ -1 << 30, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i31), .{ -1 << 30, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i31), .{ -1 << 30, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u31), .{ 0, 1, 1 << 30 });
             try testArgs(@Vector(3, u8), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u9), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u15), @Vector(3, u31), .{ 0, 1, 1 << 30 });
             try testArgs(@Vector(3, u16), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u17), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u31), @Vector(3, u31), .{ 0, 1, 1 << 30 });
             try testArgs(@Vector(3, u32), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u33), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u63), @Vector(3, u31), .{ 0, 1, 1 << 30 });
             try testArgs(@Vector(3, u64), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u65), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u127), @Vector(3, u31), .{ 0, 1, 1 << 30 });
             try testArgs(@Vector(3, u128), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u129), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u255), @Vector(3, u31), .{ 0, 1, 1 << 30 });
             try testArgs(@Vector(3, u256), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u257), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u511), @Vector(3, u31), .{ 0, 1, 1 << 30 });
             try testArgs(@Vector(3, u512), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u513), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u31), .{ 0, 1, 1 << 30 });
             try testArgs(@Vector(3, u1024), @Vector(3, u31), .{ 0, 1, 1 << 30 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u31), .{ 0, 1, 1 << 30 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i32), .{ -1 << 31, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i32), .{ -1 << 31, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i32), .{ -1 << 31, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i32), .{ -1 << 31, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i32), .{ -1 << 31, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i32), .{ -1 << 31, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i32), .{ -1 << 31, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i32), .{ -1 << 31, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i32), .{ -1 << 31, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u32), .{ 0, 1, 1 << 31 });
             try testArgs(@Vector(3, u8), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u9), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u15), @Vector(3, u32), .{ 0, 1, 1 << 31 });
             try testArgs(@Vector(3, u16), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u17), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u31), @Vector(3, u32), .{ 0, 1, 1 << 31 });
             try testArgs(@Vector(3, u32), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u33), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u63), @Vector(3, u32), .{ 0, 1, 1 << 31 });
             try testArgs(@Vector(3, u64), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u65), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u127), @Vector(3, u32), .{ 0, 1, 1 << 31 });
             try testArgs(@Vector(3, u128), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u129), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u255), @Vector(3, u32), .{ 0, 1, 1 << 31 });
             try testArgs(@Vector(3, u256), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u257), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u511), @Vector(3, u32), .{ 0, 1, 1 << 31 });
             try testArgs(@Vector(3, u512), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u513), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u32), .{ 0, 1, 1 << 31 });
             try testArgs(@Vector(3, u1024), @Vector(3, u32), .{ 0, 1, 1 << 31 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u32), .{ 0, 1, 1 << 31 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i33), .{ -1 << 32, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i33), .{ -1 << 32, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i33), .{ -1 << 32, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i33), .{ -1 << 32, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i33), .{ -1 << 32, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i33), .{ -1 << 32, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i33), .{ -1 << 32, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i33), .{ -1 << 32, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i33), .{ -1 << 32, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u33), .{ 0, 1, 1 << 32 });
             try testArgs(@Vector(3, u8), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u9), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u15), @Vector(3, u33), .{ 0, 1, 1 << 32 });
             try testArgs(@Vector(3, u16), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u17), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u31), @Vector(3, u33), .{ 0, 1, 1 << 32 });
             try testArgs(@Vector(3, u32), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u33), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u63), @Vector(3, u33), .{ 0, 1, 1 << 32 });
             try testArgs(@Vector(3, u64), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u65), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u127), @Vector(3, u33), .{ 0, 1, 1 << 32 });
             try testArgs(@Vector(3, u128), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u129), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u255), @Vector(3, u33), .{ 0, 1, 1 << 32 });
             try testArgs(@Vector(3, u256), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u257), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u511), @Vector(3, u33), .{ 0, 1, 1 << 32 });
             try testArgs(@Vector(3, u512), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u513), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u33), .{ 0, 1, 1 << 32 });
             try testArgs(@Vector(3, u1024), @Vector(3, u33), .{ 0, 1, 1 << 32 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u33), .{ 0, 1, 1 << 32 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i63), .{ -1 << 62, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i63), .{ -1 << 62, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i63), .{ -1 << 62, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i63), .{ -1 << 62, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i63), .{ -1 << 62, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i63), .{ -1 << 62, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i63), .{ -1 << 62, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i63), .{ -1 << 62, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i63), .{ -1 << 62, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u63), .{ 0, 1, 1 << 62 });
             try testArgs(@Vector(3, u8), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u9), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u15), @Vector(3, u63), .{ 0, 1, 1 << 62 });
             try testArgs(@Vector(3, u16), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u17), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u31), @Vector(3, u63), .{ 0, 1, 1 << 62 });
             try testArgs(@Vector(3, u32), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u33), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u63), @Vector(3, u63), .{ 0, 1, 1 << 62 });
             try testArgs(@Vector(3, u64), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u65), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u127), @Vector(3, u63), .{ 0, 1, 1 << 62 });
             try testArgs(@Vector(3, u128), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u129), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u255), @Vector(3, u63), .{ 0, 1, 1 << 62 });
             try testArgs(@Vector(3, u256), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u257), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u511), @Vector(3, u63), .{ 0, 1, 1 << 62 });
             try testArgs(@Vector(3, u512), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u513), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u63), .{ 0, 1, 1 << 62 });
             try testArgs(@Vector(3, u1024), @Vector(3, u63), .{ 0, 1, 1 << 62 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u63), .{ 0, 1, 1 << 62 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i64), .{ -1 << 63, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i64), .{ -1 << 63, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i64), .{ -1 << 63, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i64), .{ -1 << 63, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i64), .{ -1 << 63, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i64), .{ -1 << 63, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i64), .{ -1 << 63, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i64), .{ -1 << 63, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i64), .{ -1 << 63, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u64), .{ 0, 1, 1 << 63 });
             try testArgs(@Vector(3, u8), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u9), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u15), @Vector(3, u64), .{ 0, 1, 1 << 63 });
             try testArgs(@Vector(3, u16), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u17), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u31), @Vector(3, u64), .{ 0, 1, 1 << 63 });
             try testArgs(@Vector(3, u32), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u33), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u63), @Vector(3, u64), .{ 0, 1, 1 << 63 });
             try testArgs(@Vector(3, u64), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u65), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u127), @Vector(3, u64), .{ 0, 1, 1 << 63 });
             try testArgs(@Vector(3, u128), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u129), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u255), @Vector(3, u64), .{ 0, 1, 1 << 63 });
             try testArgs(@Vector(3, u256), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u257), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u511), @Vector(3, u64), .{ 0, 1, 1 << 63 });
             try testArgs(@Vector(3, u512), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u513), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u64), .{ 0, 1, 1 << 63 });
             try testArgs(@Vector(3, u1024), @Vector(3, u64), .{ 0, 1, 1 << 63 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u64), .{ 0, 1, 1 << 63 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i65), .{ -1 << 64, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i65), .{ -1 << 64, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i65), .{ -1 << 64, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i65), .{ -1 << 64, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i65), .{ -1 << 64, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i65), .{ -1 << 64, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i65), .{ -1 << 64, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i65), .{ -1 << 64, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i65), .{ -1 << 64, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u65), .{ 0, 1, 1 << 64 });
             try testArgs(@Vector(3, u8), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u9), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u15), @Vector(3, u65), .{ 0, 1, 1 << 64 });
             try testArgs(@Vector(3, u16), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u17), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u31), @Vector(3, u65), .{ 0, 1, 1 << 64 });
             try testArgs(@Vector(3, u32), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u33), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u63), @Vector(3, u65), .{ 0, 1, 1 << 64 });
             try testArgs(@Vector(3, u64), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u65), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u127), @Vector(3, u65), .{ 0, 1, 1 << 64 });
             try testArgs(@Vector(3, u128), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u129), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u255), @Vector(3, u65), .{ 0, 1, 1 << 64 });
             try testArgs(@Vector(3, u256), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u257), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u511), @Vector(3, u65), .{ 0, 1, 1 << 64 });
             try testArgs(@Vector(3, u512), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u513), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u65), .{ 0, 1, 1 << 64 });
             try testArgs(@Vector(3, u1024), @Vector(3, u65), .{ 0, 1, 1 << 64 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u65), .{ 0, 1, 1 << 64 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i95), .{ -1 << 94, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i95), .{ -1 << 94, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i95), .{ -1 << 94, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i95), .{ -1 << 94, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i95), .{ -1 << 94, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i95), .{ -1 << 94, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i95), .{ -1 << 94, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i95), .{ -1 << 94, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i95), .{ -1 << 94, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u95), .{ 0, 1, 1 << 94 });
             try testArgs(@Vector(3, u8), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u9), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u15), @Vector(3, u95), .{ 0, 1, 1 << 94 });
             try testArgs(@Vector(3, u16), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u17), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u31), @Vector(3, u95), .{ 0, 1, 1 << 94 });
             try testArgs(@Vector(3, u32), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u33), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u63), @Vector(3, u95), .{ 0, 1, 1 << 94 });
             try testArgs(@Vector(3, u64), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u65), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u127), @Vector(3, u95), .{ 0, 1, 1 << 94 });
             try testArgs(@Vector(3, u128), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u129), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u255), @Vector(3, u95), .{ 0, 1, 1 << 94 });
             try testArgs(@Vector(3, u256), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u257), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u511), @Vector(3, u95), .{ 0, 1, 1 << 94 });
             try testArgs(@Vector(3, u512), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u513), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u95), .{ 0, 1, 1 << 94 });
             try testArgs(@Vector(3, u1024), @Vector(3, u95), .{ 0, 1, 1 << 94 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u95), .{ 0, 1, 1 << 94 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i96), .{ -1 << 95, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i96), .{ -1 << 95, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i96), .{ -1 << 95, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i96), .{ -1 << 95, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i96), .{ -1 << 95, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i96), .{ -1 << 95, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i96), .{ -1 << 95, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i96), .{ -1 << 95, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i96), .{ -1 << 95, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u96), .{ 0, 1, 1 << 95 });
             try testArgs(@Vector(3, u8), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u9), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u15), @Vector(3, u96), .{ 0, 1, 1 << 95 });
             try testArgs(@Vector(3, u16), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u17), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u31), @Vector(3, u96), .{ 0, 1, 1 << 95 });
             try testArgs(@Vector(3, u32), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u33), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u63), @Vector(3, u96), .{ 0, 1, 1 << 95 });
             try testArgs(@Vector(3, u64), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u65), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u127), @Vector(3, u96), .{ 0, 1, 1 << 95 });
             try testArgs(@Vector(3, u128), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u129), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u255), @Vector(3, u96), .{ 0, 1, 1 << 95 });
             try testArgs(@Vector(3, u256), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u257), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u511), @Vector(3, u96), .{ 0, 1, 1 << 95 });
             try testArgs(@Vector(3, u512), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u513), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u96), .{ 0, 1, 1 << 95 });
             try testArgs(@Vector(3, u1024), @Vector(3, u96), .{ 0, 1, 1 << 95 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u96), .{ 0, 1, 1 << 95 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i97), .{ -1 << 96, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i97), .{ -1 << 96, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i97), .{ -1 << 96, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i97), .{ -1 << 96, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i97), .{ -1 << 96, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i97), .{ -1 << 96, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i97), .{ -1 << 96, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i97), .{ -1 << 96, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i97), .{ -1 << 96, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u97), .{ 0, 1, 1 << 96 });
             try testArgs(@Vector(3, u8), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u9), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u15), @Vector(3, u97), .{ 0, 1, 1 << 96 });
             try testArgs(@Vector(3, u16), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u17), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u31), @Vector(3, u97), .{ 0, 1, 1 << 96 });
             try testArgs(@Vector(3, u32), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u33), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u63), @Vector(3, u97), .{ 0, 1, 1 << 96 });
             try testArgs(@Vector(3, u64), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u65), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u127), @Vector(3, u97), .{ 0, 1, 1 << 96 });
             try testArgs(@Vector(3, u128), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u129), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u255), @Vector(3, u97), .{ 0, 1, 1 << 96 });
             try testArgs(@Vector(3, u256), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u257), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u511), @Vector(3, u97), .{ 0, 1, 1 << 96 });
             try testArgs(@Vector(3, u512), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u513), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u97), .{ 0, 1, 1 << 96 });
             try testArgs(@Vector(3, u1024), @Vector(3, u97), .{ 0, 1, 1 << 96 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u97), .{ 0, 1, 1 << 96 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i127), .{ -1 << 126, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i127), .{ -1 << 126, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i127), .{ -1 << 126, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i127), .{ -1 << 126, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i127), .{ -1 << 126, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i127), .{ -1 << 126, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i127), .{ -1 << 126, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i127), .{ -1 << 126, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i127), .{ -1 << 126, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u127), .{ 0, 1, 1 << 126 });
             try testArgs(@Vector(3, u8), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u9), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u15), @Vector(3, u127), .{ 0, 1, 1 << 126 });
             try testArgs(@Vector(3, u16), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u17), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u31), @Vector(3, u127), .{ 0, 1, 1 << 126 });
             try testArgs(@Vector(3, u32), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u33), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u63), @Vector(3, u127), .{ 0, 1, 1 << 126 });
             try testArgs(@Vector(3, u64), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u65), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u127), @Vector(3, u127), .{ 0, 1, 1 << 126 });
             try testArgs(@Vector(3, u128), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u129), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u255), @Vector(3, u127), .{ 0, 1, 1 << 126 });
             try testArgs(@Vector(3, u256), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u257), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u511), @Vector(3, u127), .{ 0, 1, 1 << 126 });
             try testArgs(@Vector(3, u512), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u513), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u127), .{ 0, 1, 1 << 126 });
             try testArgs(@Vector(3, u1024), @Vector(3, u127), .{ 0, 1, 1 << 126 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u127), .{ 0, 1, 1 << 126 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i128), .{ -1 << 127, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i128), .{ -1 << 127, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i128), .{ -1 << 127, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i128), .{ -1 << 127, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i128), .{ -1 << 127, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i128), .{ -1 << 127, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i128), .{ -1 << 127, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i128), .{ -1 << 127, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i128), .{ -1 << 127, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u128), .{ 0, 1, 1 << 127 });
             try testArgs(@Vector(3, u8), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u9), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u15), @Vector(3, u128), .{ 0, 1, 1 << 127 });
             try testArgs(@Vector(3, u16), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u17), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u31), @Vector(3, u128), .{ 0, 1, 1 << 127 });
             try testArgs(@Vector(3, u32), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u33), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u63), @Vector(3, u128), .{ 0, 1, 1 << 127 });
             try testArgs(@Vector(3, u64), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u65), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u127), @Vector(3, u128), .{ 0, 1, 1 << 127 });
             try testArgs(@Vector(3, u128), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u129), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u255), @Vector(3, u128), .{ 0, 1, 1 << 127 });
             try testArgs(@Vector(3, u256), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u257), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u511), @Vector(3, u128), .{ 0, 1, 1 << 127 });
             try testArgs(@Vector(3, u512), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u513), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u128), .{ 0, 1, 1 << 127 });
             try testArgs(@Vector(3, u1024), @Vector(3, u128), .{ 0, 1, 1 << 127 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u128), .{ 0, 1, 1 << 127 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i129), .{ -1 << 128, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i129), .{ -1 << 128, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i129), .{ -1 << 128, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i129), .{ -1 << 128, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i129), .{ -1 << 128, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i129), .{ -1 << 128, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i129), .{ -1 << 128, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i129), .{ -1 << 128, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i129), .{ -1 << 128, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u129), .{ 0, 1, 1 << 128 });
             try testArgs(@Vector(3, u8), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u9), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u15), @Vector(3, u129), .{ 0, 1, 1 << 128 });
             try testArgs(@Vector(3, u16), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u17), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u31), @Vector(3, u129), .{ 0, 1, 1 << 128 });
             try testArgs(@Vector(3, u32), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u33), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u63), @Vector(3, u129), .{ 0, 1, 1 << 128 });
             try testArgs(@Vector(3, u64), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u65), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u127), @Vector(3, u129), .{ 0, 1, 1 << 128 });
             try testArgs(@Vector(3, u128), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u129), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u255), @Vector(3, u129), .{ 0, 1, 1 << 128 });
             try testArgs(@Vector(3, u256), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u257), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u511), @Vector(3, u129), .{ 0, 1, 1 << 128 });
             try testArgs(@Vector(3, u512), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u513), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u129), .{ 0, 1, 1 << 128 });
             try testArgs(@Vector(3, u1024), @Vector(3, u129), .{ 0, 1, 1 << 128 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u129), .{ 0, 1, 1 << 128 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i159), .{ -1 << 158, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i159), .{ -1 << 158, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i159), .{ -1 << 158, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i159), .{ -1 << 158, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i159), .{ -1 << 158, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i159), .{ -1 << 158, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i159), .{ -1 << 158, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i159), .{ -1 << 158, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i159), .{ -1 << 158, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u159), .{ 0, 1, 1 << 158 });
             try testArgs(@Vector(3, u8), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u9), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u15), @Vector(3, u159), .{ 0, 1, 1 << 158 });
             try testArgs(@Vector(3, u16), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u17), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u31), @Vector(3, u159), .{ 0, 1, 1 << 158 });
             try testArgs(@Vector(3, u32), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u33), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u63), @Vector(3, u159), .{ 0, 1, 1 << 158 });
             try testArgs(@Vector(3, u64), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u65), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u127), @Vector(3, u159), .{ 0, 1, 1 << 158 });
             try testArgs(@Vector(3, u128), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u129), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u255), @Vector(3, u159), .{ 0, 1, 1 << 158 });
             try testArgs(@Vector(3, u256), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u257), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u511), @Vector(3, u159), .{ 0, 1, 1 << 158 });
             try testArgs(@Vector(3, u512), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u513), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u159), .{ 0, 1, 1 << 158 });
             try testArgs(@Vector(3, u1024), @Vector(3, u159), .{ 0, 1, 1 << 158 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u159), .{ 0, 1, 1 << 158 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i160), .{ -1 << 159, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i160), .{ -1 << 159, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i160), .{ -1 << 159, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i160), .{ -1 << 159, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i160), .{ -1 << 159, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i160), .{ -1 << 159, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i160), .{ -1 << 159, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i160), .{ -1 << 159, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i160), .{ -1 << 159, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u160), .{ 0, 1, 1 << 159 });
             try testArgs(@Vector(3, u8), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u9), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u15), @Vector(3, u160), .{ 0, 1, 1 << 159 });
             try testArgs(@Vector(3, u16), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u17), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u31), @Vector(3, u160), .{ 0, 1, 1 << 159 });
             try testArgs(@Vector(3, u32), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u33), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u63), @Vector(3, u160), .{ 0, 1, 1 << 159 });
             try testArgs(@Vector(3, u64), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u65), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u127), @Vector(3, u160), .{ 0, 1, 1 << 159 });
             try testArgs(@Vector(3, u128), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u129), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u255), @Vector(3, u160), .{ 0, 1, 1 << 159 });
             try testArgs(@Vector(3, u256), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u257), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u511), @Vector(3, u160), .{ 0, 1, 1 << 159 });
             try testArgs(@Vector(3, u512), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u513), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u160), .{ 0, 1, 1 << 159 });
             try testArgs(@Vector(3, u1024), @Vector(3, u160), .{ 0, 1, 1 << 159 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u160), .{ 0, 1, 1 << 159 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i161), .{ -1 << 160, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i161), .{ -1 << 160, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i161), .{ -1 << 160, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i161), .{ -1 << 160, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i161), .{ -1 << 160, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i161), .{ -1 << 160, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i161), .{ -1 << 160, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i161), .{ -1 << 160, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i161), .{ -1 << 160, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u161), .{ 0, 1, 1 << 160 });
             try testArgs(@Vector(3, u8), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u9), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u15), @Vector(3, u161), .{ 0, 1, 1 << 160 });
             try testArgs(@Vector(3, u16), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u17), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u31), @Vector(3, u161), .{ 0, 1, 1 << 160 });
             try testArgs(@Vector(3, u32), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u33), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u63), @Vector(3, u161), .{ 0, 1, 1 << 160 });
             try testArgs(@Vector(3, u64), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u65), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u127), @Vector(3, u161), .{ 0, 1, 1 << 160 });
             try testArgs(@Vector(3, u128), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u129), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u255), @Vector(3, u161), .{ 0, 1, 1 << 160 });
             try testArgs(@Vector(3, u256), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u257), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u511), @Vector(3, u161), .{ 0, 1, 1 << 160 });
             try testArgs(@Vector(3, u512), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u513), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u161), .{ 0, 1, 1 << 160 });
             try testArgs(@Vector(3, u1024), @Vector(3, u161), .{ 0, 1, 1 << 160 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u161), .{ 0, 1, 1 << 160 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i191), .{ -1 << 190, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i191), .{ -1 << 190, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i191), .{ -1 << 190, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i191), .{ -1 << 190, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i191), .{ -1 << 190, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i191), .{ -1 << 190, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i191), .{ -1 << 190, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i191), .{ -1 << 190, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i191), .{ -1 << 190, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u191), .{ 0, 1, 1 << 190 });
             try testArgs(@Vector(3, u8), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u9), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u15), @Vector(3, u191), .{ 0, 1, 1 << 190 });
             try testArgs(@Vector(3, u16), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u17), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u31), @Vector(3, u191), .{ 0, 1, 1 << 190 });
             try testArgs(@Vector(3, u32), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u33), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u63), @Vector(3, u191), .{ 0, 1, 1 << 190 });
             try testArgs(@Vector(3, u64), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u65), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u127), @Vector(3, u191), .{ 0, 1, 1 << 190 });
             try testArgs(@Vector(3, u128), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u129), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u255), @Vector(3, u191), .{ 0, 1, 1 << 190 });
             try testArgs(@Vector(3, u256), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u257), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u511), @Vector(3, u191), .{ 0, 1, 1 << 190 });
             try testArgs(@Vector(3, u512), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u513), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u191), .{ 0, 1, 1 << 190 });
             try testArgs(@Vector(3, u1024), @Vector(3, u191), .{ 0, 1, 1 << 190 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u191), .{ 0, 1, 1 << 190 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i192), .{ -1 << 191, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i192), .{ -1 << 191, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i192), .{ -1 << 191, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i192), .{ -1 << 191, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i192), .{ -1 << 191, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i192), .{ -1 << 191, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i192), .{ -1 << 191, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i192), .{ -1 << 191, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i192), .{ -1 << 191, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u192), .{ 0, 1, 1 << 191 });
             try testArgs(@Vector(3, u8), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u9), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u15), @Vector(3, u192), .{ 0, 1, 1 << 191 });
             try testArgs(@Vector(3, u16), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u17), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u31), @Vector(3, u192), .{ 0, 1, 1 << 191 });
             try testArgs(@Vector(3, u32), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u33), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u63), @Vector(3, u192), .{ 0, 1, 1 << 191 });
             try testArgs(@Vector(3, u64), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u65), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u127), @Vector(3, u192), .{ 0, 1, 1 << 191 });
             try testArgs(@Vector(3, u128), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u129), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u255), @Vector(3, u192), .{ 0, 1, 1 << 191 });
             try testArgs(@Vector(3, u256), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u257), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u511), @Vector(3, u192), .{ 0, 1, 1 << 191 });
             try testArgs(@Vector(3, u512), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u513), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u192), .{ 0, 1, 1 << 191 });
             try testArgs(@Vector(3, u1024), @Vector(3, u192), .{ 0, 1, 1 << 191 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u192), .{ 0, 1, 1 << 191 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i193), .{ -1 << 192, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i193), .{ -1 << 192, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i193), .{ -1 << 192, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i193), .{ -1 << 192, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i193), .{ -1 << 192, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i193), .{ -1 << 192, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i193), .{ -1 << 192, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i193), .{ -1 << 192, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i193), .{ -1 << 192, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u193), .{ 0, 1, 1 << 192 });
             try testArgs(@Vector(3, u8), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u9), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u15), @Vector(3, u193), .{ 0, 1, 1 << 192 });
             try testArgs(@Vector(3, u16), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u17), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u31), @Vector(3, u193), .{ 0, 1, 1 << 192 });
             try testArgs(@Vector(3, u32), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u33), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u63), @Vector(3, u193), .{ 0, 1, 1 << 192 });
             try testArgs(@Vector(3, u64), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u65), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u127), @Vector(3, u193), .{ 0, 1, 1 << 192 });
             try testArgs(@Vector(3, u128), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u129), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u255), @Vector(3, u193), .{ 0, 1, 1 << 192 });
             try testArgs(@Vector(3, u256), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u257), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u511), @Vector(3, u193), .{ 0, 1, 1 << 192 });
             try testArgs(@Vector(3, u512), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u513), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u193), .{ 0, 1, 1 << 192 });
             try testArgs(@Vector(3, u1024), @Vector(3, u193), .{ 0, 1, 1 << 192 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u193), .{ 0, 1, 1 << 192 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i223), .{ -1 << 222, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i223), .{ -1 << 222, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i223), .{ -1 << 222, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i223), .{ -1 << 222, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i223), .{ -1 << 222, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i223), .{ -1 << 222, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i223), .{ -1 << 222, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i223), .{ -1 << 222, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i223), .{ -1 << 222, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u223), .{ 0, 1, 1 << 222 });
             try testArgs(@Vector(3, u8), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u9), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u15), @Vector(3, u223), .{ 0, 1, 1 << 222 });
             try testArgs(@Vector(3, u16), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u17), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u31), @Vector(3, u223), .{ 0, 1, 1 << 222 });
             try testArgs(@Vector(3, u32), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u33), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u63), @Vector(3, u223), .{ 0, 1, 1 << 222 });
             try testArgs(@Vector(3, u64), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u65), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u127), @Vector(3, u223), .{ 0, 1, 1 << 222 });
             try testArgs(@Vector(3, u128), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u129), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u255), @Vector(3, u223), .{ 0, 1, 1 << 222 });
             try testArgs(@Vector(3, u256), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u257), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u511), @Vector(3, u223), .{ 0, 1, 1 << 222 });
             try testArgs(@Vector(3, u512), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u513), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u223), .{ 0, 1, 1 << 222 });
             try testArgs(@Vector(3, u1024), @Vector(3, u223), .{ 0, 1, 1 << 222 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u223), .{ 0, 1, 1 << 222 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i224), .{ -1 << 223, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i224), .{ -1 << 223, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i224), .{ -1 << 223, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i224), .{ -1 << 223, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i224), .{ -1 << 223, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i224), .{ -1 << 223, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i224), .{ -1 << 223, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i224), .{ -1 << 223, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i224), .{ -1 << 223, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u224), .{ 0, 1, 1 << 223 });
             try testArgs(@Vector(3, u8), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u9), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u15), @Vector(3, u224), .{ 0, 1, 1 << 223 });
             try testArgs(@Vector(3, u16), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u17), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u31), @Vector(3, u224), .{ 0, 1, 1 << 223 });
             try testArgs(@Vector(3, u32), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u33), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u63), @Vector(3, u224), .{ 0, 1, 1 << 223 });
             try testArgs(@Vector(3, u64), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u65), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u127), @Vector(3, u224), .{ 0, 1, 1 << 223 });
             try testArgs(@Vector(3, u128), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u129), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u255), @Vector(3, u224), .{ 0, 1, 1 << 223 });
             try testArgs(@Vector(3, u256), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u257), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u511), @Vector(3, u224), .{ 0, 1, 1 << 223 });
             try testArgs(@Vector(3, u512), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u513), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u224), .{ 0, 1, 1 << 223 });
             try testArgs(@Vector(3, u1024), @Vector(3, u224), .{ 0, 1, 1 << 223 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u224), .{ 0, 1, 1 << 223 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i225), .{ -1 << 224, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i225), .{ -1 << 224, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i225), .{ -1 << 224, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i225), .{ -1 << 224, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i225), .{ -1 << 224, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i225), .{ -1 << 224, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i225), .{ -1 << 224, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i225), .{ -1 << 224, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i225), .{ -1 << 224, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u225), .{ 0, 1, 1 << 224 });
             try testArgs(@Vector(3, u8), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u9), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u15), @Vector(3, u225), .{ 0, 1, 1 << 224 });
             try testArgs(@Vector(3, u16), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u17), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u31), @Vector(3, u225), .{ 0, 1, 1 << 224 });
             try testArgs(@Vector(3, u32), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u33), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u63), @Vector(3, u225), .{ 0, 1, 1 << 224 });
             try testArgs(@Vector(3, u64), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u65), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u127), @Vector(3, u225), .{ 0, 1, 1 << 224 });
             try testArgs(@Vector(3, u128), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u129), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u255), @Vector(3, u225), .{ 0, 1, 1 << 224 });
             try testArgs(@Vector(3, u256), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u257), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u511), @Vector(3, u225), .{ 0, 1, 1 << 224 });
             try testArgs(@Vector(3, u512), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u513), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u225), .{ 0, 1, 1 << 224 });
             try testArgs(@Vector(3, u1024), @Vector(3, u225), .{ 0, 1, 1 << 224 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u225), .{ 0, 1, 1 << 224 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i255), .{ -1 << 254, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i255), .{ -1 << 254, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i255), .{ -1 << 254, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i255), .{ -1 << 254, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i255), .{ -1 << 254, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i255), .{ -1 << 254, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i255), .{ -1 << 254, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i255), .{ -1 << 254, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i255), .{ -1 << 254, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u255), .{ 0, 1, 1 << 254 });
             try testArgs(@Vector(3, u8), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u9), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u15), @Vector(3, u255), .{ 0, 1, 1 << 254 });
             try testArgs(@Vector(3, u16), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u17), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u31), @Vector(3, u255), .{ 0, 1, 1 << 254 });
             try testArgs(@Vector(3, u32), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u33), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u63), @Vector(3, u255), .{ 0, 1, 1 << 254 });
             try testArgs(@Vector(3, u64), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u65), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u127), @Vector(3, u255), .{ 0, 1, 1 << 254 });
             try testArgs(@Vector(3, u128), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u129), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u255), @Vector(3, u255), .{ 0, 1, 1 << 254 });
             try testArgs(@Vector(3, u256), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u257), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u511), @Vector(3, u255), .{ 0, 1, 1 << 254 });
             try testArgs(@Vector(3, u512), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u513), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u255), .{ 0, 1, 1 << 254 });
             try testArgs(@Vector(3, u1024), @Vector(3, u255), .{ 0, 1, 1 << 254 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u255), .{ 0, 1, 1 << 254 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i256), .{ -1 << 255, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i256), .{ -1 << 255, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i256), .{ -1 << 255, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i256), .{ -1 << 255, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i256), .{ -1 << 255, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i256), .{ -1 << 255, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i256), .{ -1 << 255, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i256), .{ -1 << 255, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i256), .{ -1 << 255, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u256), .{ 0, 1, 1 << 255 });
             try testArgs(@Vector(3, u8), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u9), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u15), @Vector(3, u256), .{ 0, 1, 1 << 255 });
             try testArgs(@Vector(3, u16), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u17), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u31), @Vector(3, u256), .{ 0, 1, 1 << 255 });
             try testArgs(@Vector(3, u32), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u33), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u63), @Vector(3, u256), .{ 0, 1, 1 << 255 });
             try testArgs(@Vector(3, u64), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u65), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u127), @Vector(3, u256), .{ 0, 1, 1 << 255 });
             try testArgs(@Vector(3, u128), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u129), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u255), @Vector(3, u256), .{ 0, 1, 1 << 255 });
             try testArgs(@Vector(3, u256), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u257), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u511), @Vector(3, u256), .{ 0, 1, 1 << 255 });
             try testArgs(@Vector(3, u512), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u513), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u256), .{ 0, 1, 1 << 255 });
             try testArgs(@Vector(3, u1024), @Vector(3, u256), .{ 0, 1, 1 << 255 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u256), .{ 0, 1, 1 << 255 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i257), .{ -1 << 256, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i257), .{ -1 << 256, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i257), .{ -1 << 256, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i257), .{ -1 << 256, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i257), .{ -1 << 256, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i257), .{ -1 << 256, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i257), .{ -1 << 256, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i257), .{ -1 << 256, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i257), .{ -1 << 256, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u257), .{ 0, 1, 1 << 256 });
             try testArgs(@Vector(3, u8), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u9), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u15), @Vector(3, u257), .{ 0, 1, 1 << 256 });
             try testArgs(@Vector(3, u16), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u17), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u31), @Vector(3, u257), .{ 0, 1, 1 << 256 });
             try testArgs(@Vector(3, u32), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u33), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u63), @Vector(3, u257), .{ 0, 1, 1 << 256 });
             try testArgs(@Vector(3, u64), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u65), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u127), @Vector(3, u257), .{ 0, 1, 1 << 256 });
             try testArgs(@Vector(3, u128), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u129), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u255), @Vector(3, u257), .{ 0, 1, 1 << 256 });
             try testArgs(@Vector(3, u256), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u257), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u511), @Vector(3, u257), .{ 0, 1, 1 << 256 });
             try testArgs(@Vector(3, u512), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u513), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u257), .{ 0, 1, 1 << 256 });
             try testArgs(@Vector(3, u1024), @Vector(3, u257), .{ 0, 1, 1 << 256 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u257), .{ 0, 1, 1 << 256 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i511), .{ -1 << 510, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i511), .{ -1 << 510, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i511), .{ -1 << 510, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i511), .{ -1 << 510, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i511), .{ -1 << 510, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i511), .{ -1 << 510, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i511), .{ -1 << 510, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i511), .{ -1 << 510, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i511), .{ -1 << 510, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u511), .{ 0, 1, 1 << 510 });
             try testArgs(@Vector(3, u8), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u9), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u15), @Vector(3, u511), .{ 0, 1, 1 << 510 });
             try testArgs(@Vector(3, u16), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u17), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u31), @Vector(3, u511), .{ 0, 1, 1 << 510 });
             try testArgs(@Vector(3, u32), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u33), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u63), @Vector(3, u511), .{ 0, 1, 1 << 510 });
             try testArgs(@Vector(3, u64), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u65), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u127), @Vector(3, u511), .{ 0, 1, 1 << 510 });
             try testArgs(@Vector(3, u128), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u129), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u255), @Vector(3, u511), .{ 0, 1, 1 << 510 });
             try testArgs(@Vector(3, u256), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u257), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u511), @Vector(3, u511), .{ 0, 1, 1 << 510 });
             try testArgs(@Vector(3, u512), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u513), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u511), .{ 0, 1, 1 << 510 });
             try testArgs(@Vector(3, u1024), @Vector(3, u511), .{ 0, 1, 1 << 510 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u511), .{ 0, 1, 1 << 510 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i512), .{ -1 << 511, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i512), .{ -1 << 511, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i512), .{ -1 << 511, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i512), .{ -1 << 511, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i512), .{ -1 << 511, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i512), .{ -1 << 511, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i512), .{ -1 << 511, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i512), .{ -1 << 511, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i512), .{ -1 << 511, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u512), .{ 0, 1, 1 << 511 });
             try testArgs(@Vector(3, u8), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u9), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u15), @Vector(3, u512), .{ 0, 1, 1 << 511 });
             try testArgs(@Vector(3, u16), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u17), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u31), @Vector(3, u512), .{ 0, 1, 1 << 511 });
             try testArgs(@Vector(3, u32), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u33), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u63), @Vector(3, u512), .{ 0, 1, 1 << 511 });
             try testArgs(@Vector(3, u64), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u65), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u127), @Vector(3, u512), .{ 0, 1, 1 << 511 });
             try testArgs(@Vector(3, u128), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u129), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u255), @Vector(3, u512), .{ 0, 1, 1 << 511 });
             try testArgs(@Vector(3, u256), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u257), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u511), @Vector(3, u512), .{ 0, 1, 1 << 511 });
             try testArgs(@Vector(3, u512), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u513), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u512), .{ 0, 1, 1 << 511 });
             try testArgs(@Vector(3, u1024), @Vector(3, u512), .{ 0, 1, 1 << 511 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u512), .{ 0, 1, 1 << 511 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i513), .{ -1 << 512, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i513), .{ -1 << 512, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i513), .{ -1 << 512, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i513), .{ -1 << 512, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i513), .{ -1 << 512, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i513), .{ -1 << 512, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i513), .{ -1 << 512, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i513), .{ -1 << 512, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i513), .{ -1 << 512, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u513), .{ 0, 1, 1 << 512 });
             try testArgs(@Vector(3, u8), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u9), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u15), @Vector(3, u513), .{ 0, 1, 1 << 512 });
             try testArgs(@Vector(3, u16), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u17), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u31), @Vector(3, u513), .{ 0, 1, 1 << 512 });
             try testArgs(@Vector(3, u32), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u33), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u63), @Vector(3, u513), .{ 0, 1, 1 << 512 });
             try testArgs(@Vector(3, u64), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u65), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u127), @Vector(3, u513), .{ 0, 1, 1 << 512 });
             try testArgs(@Vector(3, u128), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u129), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u255), @Vector(3, u513), .{ 0, 1, 1 << 512 });
             try testArgs(@Vector(3, u256), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u257), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u511), @Vector(3, u513), .{ 0, 1, 1 << 512 });
             try testArgs(@Vector(3, u512), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u513), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u513), .{ 0, 1, 1 << 512 });
             try testArgs(@Vector(3, u1024), @Vector(3, u513), .{ 0, 1, 1 << 512 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u513), .{ 0, 1, 1 << 512 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i1023), .{ -1 << 1022, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
             try testArgs(@Vector(3, u8), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u9), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u15), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
             try testArgs(@Vector(3, u16), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u17), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u31), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
             try testArgs(@Vector(3, u32), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u33), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u63), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
             try testArgs(@Vector(3, u64), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u65), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u127), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
             try testArgs(@Vector(3, u128), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u129), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u255), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
             try testArgs(@Vector(3, u256), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u257), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u511), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
             try testArgs(@Vector(3, u512), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u513), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
             try testArgs(@Vector(3, u1024), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u1023), .{ 0, 1, 1 << 1022 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i1024), .{ -1 << 1023, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
             try testArgs(@Vector(3, u8), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u9), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u15), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
             try testArgs(@Vector(3, u16), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u17), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u31), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
             try testArgs(@Vector(3, u32), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u33), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u63), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
             try testArgs(@Vector(3, u64), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u65), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u127), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
             try testArgs(@Vector(3, u128), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u129), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u255), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
             try testArgs(@Vector(3, u256), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u257), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u511), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
             try testArgs(@Vector(3, u512), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u513), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
             try testArgs(@Vector(3, u1024), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u1024), .{ 0, 1, 1 << 1023 });
 
+            try testArgs(@Vector(3, i7), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, i8), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i9), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i15), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, i16), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i17), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i31), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, i32), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i33), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i63), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, i64), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i65), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i127), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, i128), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i129), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i255), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, i256), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i257), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i511), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, i512), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i513), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i1023), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
             try testArgs(@Vector(3, i1024), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, i1025), @Vector(3, i1025), .{ -1 << 1024, -1, 0 });
+            try testArgs(@Vector(3, u7), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
             try testArgs(@Vector(3, u8), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u9), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u15), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
             try testArgs(@Vector(3, u16), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u17), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u31), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
             try testArgs(@Vector(3, u32), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u33), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u63), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
             try testArgs(@Vector(3, u64), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u65), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u127), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
             try testArgs(@Vector(3, u128), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u129), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u255), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
             try testArgs(@Vector(3, u256), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u257), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u511), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
             try testArgs(@Vector(3, u512), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u513), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u1023), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
             try testArgs(@Vector(3, u1024), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
+            try testArgs(@Vector(3, u1025), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
         }
         fn testIntVectors() !void {
             try testSameSignednessIntVectors();
@@ -10056,6 +11400,7 @@ inline fn truncate(comptime Result: type, comptime Type: type, rhs: Type, compti
 test truncate {
     const test_truncate = cast(truncate, .{});
     try test_truncate.testSameSignednessInts();
+    try test_truncate.testSameSignednessIntVectors();
 }
 
 inline fn floatCast(comptime Result: type, comptime Type: type, rhs: Type, comptime _: Type) Result {