Commit c7433212d1

Jacob Young <jacobly0@users.noreply.github.com>
2025-01-24 20:00:15
x86_64: rewrite scalar and vector int `@min` and `@max`
1 parent 7701cfa
lib/std/zig/Zir.zig
@@ -2128,7 +2128,7 @@ pub const Inst = struct {
         ref_start_index = static_len,
         _,
 
-        pub const static_len = 76;
+        pub const static_len = 92;
 
         pub fn toRef(i: Index) Inst.Ref {
             return @enumFromInt(@intFromEnum(Index.ref_start_index) + @intFromEnum(i));
@@ -2211,6 +2211,22 @@ pub const Inst = struct {
         single_const_pointer_to_comptime_int_type,
         slice_const_u8_type,
         slice_const_u8_sentinel_0_type,
+        vector_16_i8_type,
+        vector_32_i8_type,
+        vector_16_u8_type,
+        vector_32_u8_type,
+        vector_8_i16_type,
+        vector_16_i16_type,
+        vector_8_u16_type,
+        vector_16_u16_type,
+        vector_4_i32_type,
+        vector_8_i32_type,
+        vector_4_u32_type,
+        vector_8_u32_type,
+        vector_2_i64_type,
+        vector_4_i64_type,
+        vector_2_u64_type,
+        vector_4_u64_type,
         vector_4_f16_type,
         vector_8_f16_type,
         vector_4_f32_type,
src/arch/x86_64/CodeGen.zig
@@ -2393,7 +2393,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
 }
 
 fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
-    @setEvalBranchQuota(2_700);
+    @setEvalBranchQuota(3_400);
     const pt = cg.pt;
     const zcu = pt.zcu;
     const ip = &zcu.intern_pool;
@@ -2433,8 +2433,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .add_wrap,
             .sub,
             .sub_wrap,
-            .min,
-            .max,
             => |air_tag| try cg.airBinOp(inst, air_tag),
 
             .shr, .shr_exact => try cg.airShlShrBinOp(inst),
@@ -2554,231 +2552,5396 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 var res: [1]Temp = undefined;
                 if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .simm32 } },
+                        .{ .src = .{ .to_gpr, .simm32 } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .add_src0_elem_size_times_src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 2 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 2 + 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 4 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 4 + 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src1 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .dst_constraints = .{.{ .elem_size_is = 8 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .dst_constraints = .{.{ .elem_size_is = 8 + 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src1 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.po2_elem_size},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src1 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._l, .sh, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
+                    } },
+                }, .{
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .add_src0_elem_size), ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                    } },
+                } }) catch |err| switch (err) {
+                    error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
+                        @tagName(air_tag),
+                        cg.typeOf(bin_op.lhs).fmt(pt),
+                        ops[0].tracking(cg),
+                        ops[1].tracking(cg),
+                    }),
+                    else => |e| return e,
+                } else { // hack around Sema OPV bugs
+                    res[0] = ops[0];
+                }
+                try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
+            },
+            .ptr_sub => |air_tag| if (use_old) try cg.airPtrArithmetic(inst, air_tag) else {
+                const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
+                const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+                var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+                try ops[0].toSlicePtr(cg);
+                var res: [1]Temp = undefined;
+                if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .simm32 } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .sub_src0_elem_size_times_src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src1 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 2 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src1 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 2 + 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ },
+                        .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 4 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src1 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.{ .elem_size_is = 4 + 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ },
+                        .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .dst_constraints = .{.{ .elem_size_is = 8 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src1 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .dst_constraints = .{.{ .elem_size_is = 8 + 1 }},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ },
+                        .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                    } },
+                }, .{
+                    .dst_constraints = .{.po2_elem_size},
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src1 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._l, .sa, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ },
+                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
+                    } },
+                }, .{
+                    .patterns = &.{
+                        .{ .src = .{ .to_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .sub_src0_elem_size), ._ },
+                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                    } },
+                } }) catch |err| switch (err) {
+                    error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
+                        @tagName(air_tag),
+                        cg.typeOf(bin_op.lhs).fmt(pt),
+                        ops[0].tracking(cg),
+                        ops[1].tracking(cg),
+                    }),
+                    else => |e| return e,
+                } else {
+                    // hack around Sema OPV bugs
+                    res[0] = ops[0];
+                }
+                try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
+            },
+            .max => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else fallback: {
+                const bin_op = air_datas[@intFromEnum(inst)].bin_op;
+                if (cg.typeOf(bin_op.lhs).scalarType(zcu).isRuntimeFloat()) break :fallback try cg.airBinOp(inst, air_tag);
+                var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+                var res: [1]Temp = undefined;
+                cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                        .{ ._, ._l, .cmov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                        .{ ._, ._nl, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                        .{ ._, ._b, .cmov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                        .{ ._, ._nb, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                        .{ ._, ._l, .cmov, .dst0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                        .{ ._, ._nl, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                        .{ ._, ._b, .cmov, .dst0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                        .{ ._, ._nb, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                        .{ ._, ._l, .cmov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                        .{ ._, ._nl, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                        .{ ._, ._b, .cmov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                        .{ ._, ._nb, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                        .{ ._, ._l, .cmov, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                        .{ ._, ._nl, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                        .{ ._, ._b, .cmov, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                        .{ ._, ._nb, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .any_signed_int, .any_signed_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+                        .{ ._, ._l, .cmov, .tmp0p, .tmp1p, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.src0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_signed_int, .any_signed_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._nl, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src1), ._, ._ },
+                        .{ .@"0:", ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.src0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any_unsigned_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+                        .{ ._, ._b, .cmov, .tmp0p, .tmp1p, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.src0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any_unsigned_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._nb, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src1), ._, ._ },
+                        .{ .@"0:", ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.src0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_b, .maxs, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_b, .maxs, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._dqa, .mov, .dst0x, .src0x, ._, ._ },
+                        .{ ._, .p_b, .cmpgt, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"and", .src0x, .dst0x, ._, ._ },
+                        .{ ._, .p_, .andn, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"or", .dst0x, .src0x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .byte } },
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_b, .maxs, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_32_i8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_b, .maxs, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_b, .maxs, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .maxs, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_b, .cmpgt, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .andn, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, .slow_incdec, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .movsx, .tmp2d, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .tmp2b, ._, ._ },
+                        .{ ._, ._l, .cmov, .tmp1d, .tmp2d, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .movsx, .tmp2d, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .tmp2b, ._, ._ },
+                        .{ ._, ._l, .cmov, .tmp1d, .tmp2d, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nl, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nl, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, .mmx, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .qword, .is = .byte } },
+                        .{ .scalar_unsigned_int = .{ .of = .qword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_mmx, .mem } },
+                        .{ .src = .{ .mem, .to_mut_mmx }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_mmx, .to_mmx } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_b, .maxu, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_b, .maxu, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_b, .maxu, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .byte } },
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_b, .maxu, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_b, .maxu, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_b, .maxu, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .maxu, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, .slow_incdec, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .movzx, .tmp2d, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .tmp2b, ._, ._ },
+                        .{ ._, ._b, .cmov, .tmp1d, .tmp2d, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .movzx, .tmp2d, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .tmp2b, ._, ._ },
+                        .{ ._, ._b, .cmov, .tmp1d, .tmp2d, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nb, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nb, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, .mmx, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .qword, .is = .word } },
+                        .{ .scalar_signed_int = .{ .of = .qword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_mmx, .mem } },
+                        .{ .src = .{ .mem, .to_mut_mmx }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_mmx, .to_mmx } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_w, .maxs, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .maxs, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_w, .maxs, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .word } },
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .maxs, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_w, .maxs, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_w, .maxs, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_w, .maxs, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._l, .cmov, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nl, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .maxu, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_w, .maxu, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_w, .subus, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_w, .add, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .word } },
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .maxu, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_w, .maxu, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_w, .maxu, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_w, .maxu, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_w, .subus, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_w, .add, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._b, .cmov, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nb, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .maxs, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_d, .maxs, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._dqa, .mov, .dst0x, .src0x, ._, ._ },
+                        .{ ._, .p_d, .cmpgt, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"and", .src0x, .dst0x, ._, ._ },
+                        .{ ._, .p_, .andn, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"or", .dst0x, .src0x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .dword } },
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .maxs, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_d, .maxs, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_d, .maxs, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_d, .maxs, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_d, .cmpgt, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .andn, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._l, .cmov, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nl, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0d, .tmp0, .add_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .maxu, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_d, .maxu, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._dqa, .mov, .dst0x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ },
+                        .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp2x, .src1x, ._, ._ },
+                        .{ ._, .p_d, .cmpgt, .dst0x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .@"and", .src0x, .dst0x, ._, ._ },
+                        .{ ._, .p_, .andn, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"or", .dst0x, .src0x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .dword } },
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .maxu, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_d, .maxu, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_d, .maxu, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_d, .maxu, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp5x, .tmp3x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp6x, .tmp4x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp5x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp6x, .tmp2x, ._, ._ },
+                        .{ ._, .p_d, .cmpgt, .tmp5x, .tmp6x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp3x, .tmp5x, ._, ._ },
+                        .{ ._, .p_, .andn, .tmp5x, .tmp4x, ._, ._ },
+                        .{ ._, .p_, .@"or", .tmp3x, .tmp5x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._b, .cmov, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nb, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0d, .tmp0, .add_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_q, .cmpgt, .dst0x, .src1x, .src0x, ._ },
+                        .{ ._, .vp_b, .blendv, .dst0x, .src0x, .src1x, .dst0x },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .vector_2_i64, .kind = .{ .reg = .xmm0 } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._dqa, .mov, .tmp0x, .src1x, ._, ._ },
+                        .{ ._, .p_q, .cmpgt, .tmp0x, .src0x, ._, ._ },
+                        .{ ._, .p_b, .blendv, .dst0x, .src1x, .tmp0x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_q, .cmpgt, .dst0y, .src1y, .src0y, ._ },
+                        .{ ._, .vp_b, .blendv, .dst0y, .src0y, .src1y, .dst0y },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_q, .cmpgt, .tmp3y, .tmp2y, .tmp1y, ._ },
+                        .{ ._, .vp_b, .blendv, .tmp1y, .tmp1y, .tmp2y, .tmp3y },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_q, .cmpgt, .tmp3x, .tmp2x, .tmp1x, ._ },
+                        .{ ._, .vp_b, .blendv, .tmp1x, .tmp1x, .tmp2x, .tmp3x },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .reg = .xmm0 } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, .p_q, .cmpgt, .tmp3x, .tmp1x, ._, ._ },
+                        .{ ._, .p_b, .blendv, .tmp1x, .tmp2x, .tmp3x, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._l, .cmov, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nl, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .v_, .movddup, .tmp2x, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, .vp_, .xor, .dst0x, .tmp2x, .src0x, ._ },
+                        .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .src1x, ._ },
+                        .{ ._, .vp_q, .cmpgt, .dst0x, .tmp2x, .dst0x, ._ },
+                        .{ ._, .vp_b, .blendv, .dst0x, .src0x, .src1x, .dst0x },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .reg = .xmm0 } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._, .movddup, .tmp2x, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp2x, .src0x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp3x, .src1x, ._, ._ },
+                        .{ ._, .p_q, .cmpgt, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, .p_b, .blendv, .dst0x, .src1x, .tmp3x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_q, .broadcast, .tmp2y, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, .vp_, .xor, .dst0y, .tmp2y, .src0y, ._ },
+                        .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .src1y, ._ },
+                        .{ ._, .vp_q, .cmpgt, .dst0y, .tmp2y, .dst0y, ._ },
+                        .{ ._, .vp_b, .blendv, .dst0y, .src0y, .src1y, .dst0y },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_q, .broadcast, .tmp2y, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_, .xor, .tmp5y, .tmp3y, .tmp2y, ._ },
+                        .{ ._, .vp_, .xor, .tmp6y, .tmp4y, .tmp2y, ._ },
+                        .{ ._, .vp_q, .cmpgt, .tmp5y, .tmp6y, .tmp5y, ._ },
+                        .{ ._, .vp_b, .blendv, .tmp3y, .tmp3y, .tmp4y, .tmp5y },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp3y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .v_, .movddup, .tmp2x, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_, .xor, .tmp5x, .tmp3x, .tmp2x, ._ },
+                        .{ ._, .vp_, .xor, .tmp6x, .tmp4x, .tmp2x, ._ },
+                        .{ ._, .vp_q, .cmpgt, .tmp5x, .tmp6x, .tmp5x, ._ },
+                        .{ ._, .vp_b, .blendv, .tmp3x, .tmp3x, .tmp4x, .tmp5x },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .reg = .xmm0 } },
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._, .movddup, .tmp2x, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp5x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp6x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .xor, .tmp5x, .tmp3x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp6x, .tmp4x, ._, ._ },
+                        .{ ._, .p_q, .cmpgt, .tmp6x, .tmp5x, ._, ._ },
+                        .{ ._, .p_b, .blendv, .tmp3x, .tmp4x, .tmp6x, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._b, .cmov, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nb, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any_scalar_signed_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .sia(-1, .none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .lead(.none, .tmp0, 8), ._, ._ },
+                        .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memiad(.src1, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._l, .cmov, .tmp1p, .tmp2p, ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any_scalar_signed_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .sia(-1, .none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .lead(.none, .tmp0, 8), ._, ._ },
+                        .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._nl, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src1, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ .@"1:", ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any_scalar_unsigned_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .lead(.none, .tmp0, 8), ._, ._ },
+                        .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._b, .cmov, .tmp1p, .tmp2p, ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .@"test", .tmp0p, .tmp0p, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any_scalar_unsigned_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .lead(.none, .tmp0, 8), ._, ._ },
+                        .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._nb, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src1, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ .@"1:", ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .@"test", .tmp0p, .tmp0p, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                } }) catch |err| switch (err) {
+                    error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
+                        @tagName(air_tag),
+                        cg.typeOf(bin_op.lhs).fmt(pt),
+                        ops[0].tracking(cg),
+                        ops[1].tracking(cg),
+                    }),
+                    else => |e| return e,
+                };
+                try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
+            },
+            .min => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else fallback: {
+                const bin_op = air_datas[@intFromEnum(inst)].bin_op;
+                if (cg.typeOf(bin_op.lhs).scalarType(zcu).isRuntimeFloat()) break :fallback try cg.airBinOp(inst, air_tag);
+                var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+                var res: [1]Temp = undefined;
+                cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                        .{ ._, ._ge, .cmov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                        .{ ._, ._nge, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                        .{ ._, ._ae, .cmov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ },
+                        .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0b, .src1b, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                        .{ ._, ._ge, .cmov, .dst0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                        .{ ._, ._nge, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                        .{ ._, ._ae, .cmov, .dst0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ },
+                        .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0w, .src1w, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                        .{ ._, ._ge, .cmov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                        .{ ._, ._nge, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                        .{ ._, ._ae, .cmov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ },
+                        .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0d, .src1d, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                        .{ ._, ._ge, .cmov, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .signed_int = .qword }, .{ .signed_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                        .{ ._, ._nge, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                        .{ ._, ._ae, .cmov, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword } },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_gpr, .mem } },
+                        .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_gpr, .to_gpr } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ },
+                        .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .any_signed_int, .any_signed_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+                        .{ ._, ._ge, .cmov, .tmp0p, .tmp1p, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.src0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_signed_int, .any_signed_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sia(1, .src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsiad(.src0q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsiad(.src1q, .@"8", .tmp0, .add_size, -8), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memad(.src0q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memad(.src1q, .add_size, -8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._nge, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src1), ._, ._ },
+                        .{ .@"0:", ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.src0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any_unsigned_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
+                        .{ ._, ._ae, .cmov, .tmp0p, .tmp1p, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.src0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_unsigned_int, .any_unsigned_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp1q, .memsia(.src1q, .@"8", .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ },
+                        .{ ._, ._nae, .j, .@"0f", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.src1), ._, ._ },
+                        .{ .@"0:", ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp2d, .sa(.src0, .add_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_b, .mins, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_b, .mins, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._dqa, .mov, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_b, .cmpgt, .dst0x, .src0x, ._, ._ },
+                        .{ ._, .p_, .@"and", .src0x, .dst0x, ._, ._ },
+                        .{ ._, .p_, .andn, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"or", .dst0x, .src0x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .byte } },
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_b, .mins, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_32_i8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_b, .mins, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_b, .mins, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .mins, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_16_i8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .cmpgt, .tmp1x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .andn, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, .slow_incdec, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .movsx, .tmp2d, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .tmp2b, ._, ._ },
+                        .{ ._, ._ge, .cmov, .tmp1d, .tmp2d, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .movsx, .tmp2d, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .tmp2b, ._, ._ },
+                        .{ ._, ._ge, .cmov, .tmp1d, .tmp2d, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nge, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nge, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, .mmx, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .qword, .is = .byte } },
+                        .{ .scalar_unsigned_int = .{ .of = .qword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_mmx, .mem } },
+                        .{ .src = .{ .mem, .to_mut_mmx }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_mmx, .to_mmx } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_b, .minu, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_b, .minu, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_b, .minu, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .byte } },
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_b, .minu, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_b, .minu, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_b, .minu, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_b, .minu, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, .slow_incdec, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .movzx, .tmp2d, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .tmp2b, ._, ._ },
+                        .{ ._, ._ae, .cmov, .tmp1d, .tmp2d, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .movzx, .tmp2d, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .tmp2b, ._, ._ },
+                        .{ ._, ._ae, .cmov, .tmp1d, .tmp2d, ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .slow_incdec, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .byte, .is = .byte } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u8, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1b, .memia(.src1b, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_size), .tmp1b, ._, ._ },
+                        .{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse, .mmx, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .qword, .is = .word } },
+                        .{ .scalar_signed_int = .{ .of = .qword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_mmx, .mem } },
+                        .{ .src = .{ .mem, .to_mut_mmx }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_mmx, .to_mmx } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_w, .mins, .dst0q, .src1q, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .mins, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_w, .mins, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .word } },
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .mins, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_w, .mins, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_w, .mins, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_i16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_w, .mins, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._ge, .cmov, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .word, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movsx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nge, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .minu, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_w, .minu, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._dqa, .mov, .dst0x, .src0x, ._, ._ },
+                        .{ ._, .p_w, .subus, .src0x, .src1x, ._, ._ },
+                        .{ ._, .p_w, .sub, .dst0x, .src0x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .word } },
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_w, .minu, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_16_u16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_w, .minu, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_w, .minu, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_w, .minu, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_w, .subus, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_w, .sub, .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._ae, .cmov, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .word, .is = .word } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u16, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1w, .memia(.src1w, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .mins, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_d, .mins, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._dqa, .mov, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_d, .cmpgt, .dst0x, .src0x, ._, ._ },
+                        .{ ._, .p_, .@"and", .src0x, .dst0x, ._, ._ },
+                        .{ ._, .p_, .andn, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"or", .dst0x, .src0x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .dword } },
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .mins, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_d, .mins, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_d, .mins, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_d, .mins, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_i32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_d, .cmpgt, .tmp1x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp2x, .tmp1x, ._, ._ },
+                        .{ ._, .p_, .andn, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._ge, .cmov, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nge, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0d, .tmp0, .add_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .minu, .dst0x, .src0x, .src1x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, .p_d, .minu, .dst0x, .src1x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._dqa, .mov, .dst0x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ },
+                        .{ ._, .p_, .xor, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp2x, .src0x, ._, ._ },
+                        .{ ._, .p_d, .cmpgt, .dst0x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .@"and", .src0x, .dst0x, ._, ._ },
+                        .{ ._, .p_, .andn, .dst0x, .src1x, ._, ._ },
+                        .{ ._, .p_, .@"or", .dst0x, .src0x, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .dword } },
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, .vp_d, .minu, .dst0y, .src0y, .src1y, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_d, .minu, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_d, .minu, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_1, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .add_src0_elem_size_times_src1), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_d, .minu, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 1 }},
+                    .required_features = .{ .sse2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .dword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .lea(.xword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp5x, .tmp3x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp6x, .tmp4x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp5x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp6x, .tmp2x, ._, ._ },
+                        .{ ._, .p_d, .cmpgt, .tmp5x, .tmp6x, ._, ._ },
+                        .{ ._, .p_, .@"and", .tmp4x, .tmp5x, ._, ._ },
+                        .{ ._, .p_, .andn, .tmp5x, .tmp3x, ._, ._ },
+                        .{ ._, .p_, .@"or", .tmp4x, .tmp5x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp4x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 2 }},
+                    .required_features = .{ .cmov, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._ae, .cmov, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 2 + 1 }},
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u32, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .memia(.src0d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1d, .memia(.src1d, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0d, .tmp0, .add_size), .tmp1d, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 4 }},
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ },
+                        .{ ._, .vp_q, .cmpgt, .dst0x, .src0x, .src1x, ._ },
+                        .{ ._, .vp_b, .blendv, .dst0x, .src0x, .src1x, .dst0x },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 4 + 1 }},
+                    .required_features = .{ .sse4_2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
                     },
-                    .dst_temps = .{.{ .ref = .src1 }},
+                    .extra_temps = .{
+                        .{ .type = .vector_2_i64, .kind = .{ .reg = .xmm0 } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp0x, .src0x, ._, ._ },
+                        .{ ._, .p_q, .cmpgt, .tmp0x, .src1x, ._, ._ },
+                        .{ ._, .p_b, .blendv, .dst0x, .src1x, .tmp0x, ._ },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .dst_constraints = .{.{ .elem_size_is = 8 }},
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .dst_temps = .{.{ .rc = .sse }},
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ },
+                        .{ ._, .vp_q, .cmpgt, .dst0y, .src0y, .src1y, ._ },
+                        .{ ._, .vp_b, .blendv, .dst0y, .src0y, .src1y, .dst0y },
                     } },
                 }, .{
-                    .required_features = .{ .@"64bit", null, null, null },
-                    .dst_constraints = .{.{ .elem_size_is = 8 + 1 }},
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .yword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .ref = .src1 }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_i64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_q, .cmpgt, .tmp3y, .tmp1y, .tmp2y, ._ },
+                        .{ ._, .vp_b, .blendv, .tmp1y, .tmp1y, .tmp2y, .tmp3y },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.po2_elem_size},
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .ref = .src1 }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._l, .sh, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_q, .cmpgt, .tmp3x, .tmp1x, .tmp2x, ._ },
+                        .{ ._, .vp_b, .blendv, .tmp1x, .tmp1x, .tmp2x, .tmp3x },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
+                    .required_features = .{ .sse4_2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_i64, .kind = .{ .reg = .xmm0 } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .add_src0_elem_size), ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp3x, .tmp1x, ._, ._ },
+                        .{ ._, .p_q, .cmpgt, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, .p_b, .blendv, .tmp1x, .tmp2x, .tmp3x, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
-                } }) catch |err| switch (err) {
-                    error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
-                        @tagName(air_tag),
-                        cg.typeOf(bin_op.lhs).fmt(pt),
-                        ops[0].tracking(cg),
-                        ops[1].tracking(cg),
-                    }),
-                    else => |e| return e,
-                } else { // hack around Sema OPV bugs
-                    res[0] = ops[0];
-                }
-                try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
-            },
-            .ptr_sub => |air_tag| if (use_old) try cg.airPtrArithmetic(inst, air_tag) else {
-                const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
-                const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
-                var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
-                try ops[0].toSlicePtr(cg);
-                var res: [1]Temp = undefined;
-                if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .simm32 } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .sub_src0_elem_size_times_src1), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._ge, .cmov, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 1 }},
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } },
+                        .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .ref = .src1 }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .i64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nge, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 2 }},
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
                     },
-                    .dst_temps = .{.{ .ref = .src1 }},
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .v_, .movddup, .tmp2x, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, .vp_, .xor, .dst0x, .tmp2x, .src0x, ._ },
+                        .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .src1x, ._ },
+                        .{ ._, .vp_q, .cmpgt, .dst0x, .dst0x, .tmp2x, ._ },
+                        .{ ._, .vp_b, .blendv, .dst0x, .src0x, .src1x, .dst0x },
+                    } },
+                }, .{
+                    .required_features = .{ .sse4_2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mut_sse, .mem } },
+                        .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_mut_sse, .to_sse } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_2_u64, .kind = .{ .reg = .xmm0 } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .ref = .src0 }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._, .movddup, .tmp2x, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp3x, .tmp2x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp2x, .src0x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp3x, .src1x, ._, ._ },
+                        .{ ._, .p_q, .cmpgt, .tmp2x, .tmp3x, ._, ._ },
+                        .{ ._, .p_b, .blendv, .dst0x, .src1x, .tmp2x, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                        .{ .scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_sse, .mem } },
+                        .{ .src = .{ .mem, .to_sse }, .commute = .{ 0, 1 } },
+                        .{ .src = .{ .to_sse, .to_sse } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .usize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.{ .rc = .sse }},
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_q, .broadcast, .tmp2y, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, .vp_, .xor, .dst0y, .tmp2y, .src0y, ._ },
+                        .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .src1y, ._ },
+                        .{ ._, .vp_q, .cmpgt, .dst0y, .dst0y, .tmp2y, ._ },
+                        .{ ._, .vp_b, .blendv, .dst0y, .src0y, .src1y, .dst0y },
+                    } },
+                }, .{
+                    .required_features = .{ .avx2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .yword, .is = .qword } },
+                    },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .vp_q, .broadcast, .tmp2y, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_, .xor, .tmp5y, .tmp3y, .tmp2y, ._ },
+                        .{ ._, .vp_, .xor, .tmp6y, .tmp4y, .tmp2y, ._ },
+                        .{ ._, .vp_q, .cmpgt, .tmp5y, .tmp5y, .tmp6y, ._ },
+                        .{ ._, .vp_b, .blendv, .tmp3y, .tmp3y, .tmp4y, .tmp5y },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp3y, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 2 + 1 }},
+                    .required_features = .{ .avx, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ },
-                        .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, .v_, .movddup, .tmp2x, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", .v_dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .v_dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .vp_, .xor, .tmp5x, .tmp3x, .tmp2x, ._ },
+                        .{ ._, .vp_, .xor, .tmp6x, .tmp4x, .tmp2x, ._ },
+                        .{ ._, .vp_q, .cmpgt, .tmp5x, .tmp5x, .tmp6x, ._ },
+                        .{ ._, .vp_b, .blendv, .tmp3x, .tmp3x, .tmp4x, .tmp5x },
+                        .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 4 }},
+                    .required_features = .{ .sse4_2, null, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .ref = .src1 }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .{ .type = .vector_2_u64, .kind = .{ .reg = .xmm0 } },
+                        .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } },
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
+                        .{ ._, ._, .movddup, .tmp2x, .lea(.qword, .tmp0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._dqa, .mov, .tmp5x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp6x, .tmp2x, ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ },
+                        .{ ._, .p_, .xor, .tmp5x, .tmp3x, ._, ._ },
+                        .{ ._, .p_, .xor, .tmp6x, .tmp4x, ._, ._ },
+                        .{ ._, .p_q, .cmpgt, .tmp5x, .tmp6x, ._, ._ },
+                        .{ ._, .p_b, .blendv, .tmp3x, .tmp4x, .tmp5x, ._ },
+                        .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.{ .elem_size_is = 4 + 1 }},
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ },
-                        .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._ae, .cmov, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", null, null, null },
-                    .dst_constraints = .{.{ .elem_size_is = 8 }},
+                    .src_constraints = .{
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } },
+                        .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } },
+                    },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .ref = .src1 }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .u64, .kind = .{ .rc = .general_purpose } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .cmp, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ .@"1:", ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
+                    } },
+                }, .{
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .any_scalar_signed_int, .any_scalar_signed_int },
+                    .patterns = &.{
+                        .{ .src = .{ .to_mem, .to_mem } },
+                    },
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
+                    .clobbers = .{ .eflags = true },
+                    .each = .{ .once = &.{
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .sia(-1, .none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .lead(.none, .tmp0, 8), ._, ._ },
+                        .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memiad(.src1, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._ge, .cmov, .tmp1p, .tmp2p, ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
                     .required_features = .{ .@"64bit", null, null, null },
-                    .dst_constraints = .{.{ .elem_size_is = 8 + 1 }},
+                    .src_constraints = .{ .any_scalar_signed_int, .any_scalar_signed_int },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ },
-                        .{ ._, ._, .neg, .dst0p, ._, ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .sia(-1, .none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .lead(.none, .tmp0, 8), ._, ._ },
+                        .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+                        .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._nge, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memiad(.src1, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ .@"1:", ._, .lea, .tmp2p, .memiad(.dst0, .tmp0, .add_size_sub_elem_size, 8), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ },
+                        .{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
-                    .dst_constraints = .{.po2_elem_size},
+                    .required_features = .{ .@"64bit", .cmov, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any_scalar_unsigned_int },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_mut_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .ref = .src1 }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, ._l, .sa, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ },
-                        .{ ._, ._, .neg, .src1p, ._, ._, ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .lead(.none, .tmp0, 8), ._, ._ },
+                        .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.src1, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._ae, .cmov, .tmp1p, .tmp2p, ._, ._ },
+                        .{ ._, ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .@"test", .tmp0p, .tmp0p, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 }, .{
+                    .required_features = .{ .@"64bit", null, null, null },
+                    .src_constraints = .{ .any_scalar_unsigned_int, .any_scalar_unsigned_int },
                     .patterns = &.{
-                        .{ .src = .{ .to_gpr, .to_gpr } },
+                        .{ .src = .{ .to_mem, .to_mem } },
                     },
-                    .dst_temps = .{.{ .rc = .general_purpose }},
+                    .extra_temps = .{
+                        .{ .type = .isize, .kind = .{ .rc = .general_purpose } },
+                        .{ .type = .isize, .kind = .{ .reg = .rsi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rdi } },
+                        .{ .type = .u64, .kind = .{ .reg = .rcx } },
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                        .unused,
+                    },
+                    .dst_temps = .{.mem},
                     .clobbers = .{ .eflags = true },
                     .each = .{ .once = &.{
-                        .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .sub_src0_elem_size), ._ },
-                        .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ },
+                        .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ },
+                        .{ .@"0:", ._, .mov, .tmp1d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, ._c, .cl, ._, ._, ._, ._ },
+                        .{ .@"1:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .sbb, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ },
+                        .{ ._, ._, .lea, .tmp0p, .lead(.none, .tmp0, 8), ._, ._ },
+                        .{ ._, ._c, .de, .tmp1d, ._, ._, ._ },
+                        .{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._nae, .j, .@"1f", ._, ._, ._ },
+                        .{ ._, ._, .lea, .tmp1p, .memia(.src1, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ .@"1:", ._, .lea, .tmp2p, .memia(.dst0, .tmp0, .add_size_sub_elem_size), ._, ._ },
+                        .{ ._, ._, .mov, .tmp3d, .sa(.none, .add_src0_elem_size_div_8), ._, ._ },
+                        .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ },
+                        .{ ._, ._, .@"test", .tmp0p, .tmp0p, ._, ._ },
+                        .{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
                     } },
                 } }) catch |err| switch (err) {
                     error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@@ -2788,10 +7951,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                         ops[1].tracking(cg),
                     }),
                     else => |e| return e,
-                } else {
-                    // hack around Sema OPV bugs
-                    res[0] = ops[0];
-                }
+                };
                 try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
             },
             .alloc => if (use_old) try cg.airAlloc(inst) else {
@@ -15760,13 +20920,13 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
         const src_mcv = try self.resolveInst(ty_op.operand);
         if (dst_ty.isVector(zcu)) {
             const max_abi_size = @max(dst_abi_size, src_abi_size);
-            if (max_abi_size > self.vectorSize(.int)) break :result null;
             const has_avx = self.hasFeature(.avx);
 
             const dst_elem_abi_size = dst_ty.childType(zcu).abiSize(zcu);
             const src_elem_abi_size = src_ty.childType(zcu).abiSize(zcu);
             switch (std.math.order(dst_elem_abi_size, src_elem_abi_size)) {
                 .lt => {
+                    if (max_abi_size > self.vectorSize(.int)) break :result null;
                     const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) {
                         else => break :result null,
                         1 => switch (src_elem_abi_size) {
@@ -15823,6 +20983,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void {
                     break :result dst_mcv;
                 },
                 .gt => if (self.hasFeature(.sse4_1)) {
+                    if (max_abi_size > self.vectorSize(.int)) break :result null;
                     const mir_tag: Mir.Inst.FixedTag = .{ switch (dst_elem_abi_size) {
                         else => break :result null,
                         2 => if (has_avx) .vp_w else .p_w,
@@ -32296,9 +37457,8 @@ fn limitImmediateType(self: *CodeGen, operand: Air.Inst.Ref, comptime T: type) !
     return mcv;
 }
 
-fn genTypedValue(self: *CodeGen, val: Value) InnerError!MCValue {
-    const pt = self.pt;
-    return switch (try codegen.genTypedValue(self.bin_file, pt, self.src_loc, val, self.target.*)) {
+fn genResult(self: *CodeGen, res: codegen.GenResult) InnerError!MCValue {
+    return switch (res) {
         .mcv => |mcv| switch (mcv) {
             .none => .none,
             .undef => .undef,
@@ -32315,6 +37475,14 @@ fn genTypedValue(self: *CodeGen, val: Value) InnerError!MCValue {
     };
 }
 
+fn genTypedValue(self: *CodeGen, val: Value) InnerError!MCValue {
+    return self.genResult(try codegen.genTypedValue(self.bin_file, self.pt, self.src_loc, val, self.target.*));
+}
+
+fn lowerUav(self: *CodeGen, val: Value) InnerError!MCValue {
+    return self.genResult(try self.bin_file.lowerUav(self.pt, val.toIntern(), .none, self.src_loc));
+}
+
 const CallMCValues = struct {
     args: []MCValue,
     return_value: InstTracking,
@@ -34106,6 +39274,10 @@ fn tempFromValue(cg: *CodeGen, value: Value) !Temp {
     return cg.tempInit(value.typeOf(cg.pt.zcu), try cg.genTypedValue(value));
 }
 
+fn tempMemFromValue(cg: *CodeGen, value: Value) !Temp {
+    return cg.tempInit(value.typeOf(cg.pt.zcu), try cg.lowerUav(value));
+}
+
 fn tempFromOperand(
     cg: *CodeGen,
     inst: Air.Inst.Index,
@@ -34364,7 +39536,10 @@ const Select = struct {
         any_bool_vec,
         any_int,
         any_signed_int,
+        any_unsigned_int,
         any_scalar_int,
+        any_scalar_signed_int,
+        any_scalar_unsigned_int,
         any_float,
         po2_any,
         bool_vec: Memory.Size,
@@ -34376,10 +39551,14 @@ const Select = struct {
         multiple_size: Memory.Size,
         int: Memory.Size,
         scalar_int_is: Memory.Size,
+        scalar_signed_int_is: Memory.Size,
+        scalar_unsigned_int_is: Memory.Size,
         scalar_int: OfIsSizes,
-        scalar_signed_int: Memory.Size,
-        scalar_unsigned_int: Memory.Size,
+        scalar_signed_int: OfIsSizes,
+        scalar_unsigned_int: OfIsSizes,
         multiple_scalar_int: OfIsSizes,
+        multiple_scalar_signed_int: OfIsSizes,
+        multiple_scalar_unsigned_int: OfIsSizes,
         scalar_remainder_int: OfIsSizes,
         float: Memory.Size,
         scalar_any_float: Memory.Size,
@@ -34413,7 +39592,10 @@ const Select = struct {
                 .any_bool_vec => ty.isVector(zcu) and ty.childType(zcu).toIntern() == .bool_type,
                 .any_int => cg.intInfo(ty) != null,
                 .any_signed_int => if (cg.intInfo(ty)) |int_info| int_info.signedness == .signed else false,
+                .any_unsigned_int => if (cg.intInfo(ty)) |int_info| int_info.signedness == .unsigned else false,
                 .any_scalar_int => cg.intInfo(ty.scalarType(zcu)) != null,
+                .any_scalar_signed_int => if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .signed else false,
+                .any_scalar_unsigned_int => if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned else false,
                 .any_float => ty.isRuntimeFloat(),
                 .po2_any => std.math.isPowerOfTwo(ty.abiSize(zcu)),
                 .bool_vec => |size| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and
@@ -34436,18 +39618,30 @@ const Select = struct {
                     size.bitSize(cg.target) >= int_info.bits
                 else
                     false,
-                .scalar_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
-                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| of_is.is.bitSize(cg.target) >= int_info.bits else false,
-                .scalar_signed_int => |size| if (cg.intInfo(ty.scalarType(zcu))) |int_info| switch (int_info.signedness) {
+                .scalar_signed_int_is => |size| if (cg.intInfo(ty.scalarType(zcu))) |int_info| switch (int_info.signedness) {
                     .signed => size.bitSize(cg.target) >= int_info.bits,
                     .unsigned => false,
                 } else false,
-                .scalar_unsigned_int => |size| if (cg.intInfo(ty.scalarType(zcu))) |int_info| switch (int_info.signedness) {
+                .scalar_unsigned_int_is => |size| if (cg.intInfo(ty.scalarType(zcu))) |int_info| switch (int_info.signedness) {
                     .signed => false,
                     .unsigned => size.bitSize(cg.target) >= int_info.bits,
                 } else false,
+                .scalar_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| of_is.is.bitSize(cg.target) >= int_info.bits else false,
+                .scalar_signed_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .signed and
+                    of_is.is.bitSize(cg.target) >= int_info.bits else false,
+                .scalar_unsigned_int => |of_is| @divExact(of_is.of.bitSize(cg.target), 8) >= cg.unalignedSize(ty) and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
+                    of_is.is.bitSize(cg.target) >= int_info.bits else false,
                 .multiple_scalar_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
                     if (cg.intInfo(ty.scalarType(zcu))) |int_info| of_is.is.bitSize(cg.target) >= int_info.bits else false,
+                .multiple_scalar_signed_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .signed and
+                    of_is.is.bitSize(cg.target) >= int_info.bits else false,
+                .multiple_scalar_unsigned_int => |of_is| ty.abiSize(zcu) % @divExact(of_is.of.bitSize(cg.target), 8) == 0 and
+                    if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned and
+                    of_is.is.bitSize(cg.target) >= int_info.bits else false,
                 .scalar_remainder_int => |of_is| if (cg.intInfo(ty.scalarType(zcu))) |int_info|
                     of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1
                 else
@@ -34548,6 +39742,10 @@ const Select = struct {
             to_x87,
             mut_x87,
             to_mut_x87,
+            mmx,
+            to_mmx,
+            mut_mmx,
+            to_mut_mmx,
             mm,
             to_mm,
             mut_mm,
@@ -34611,6 +39809,17 @@ const Select = struct {
                         else => false,
                     },
                     .to_x87, .to_mut_x87 => true,
+                    .mmx => switch (temp.tracking(cg).short) {
+                        .register => |reg| reg.class() == .mmx,
+                        .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
+                        else => false,
+                    },
+                    .mut_mmx => temp.isMut(cg) and switch (temp.tracking(cg).short) {
+                        .register => |reg| reg.class() == .mmx,
+                        .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
+                        else => false,
+                    },
+                    .to_mmx, .to_mut_mmx => true,
                     .mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) {
                         .register => |reg| reg.class() == .mmx,
                         .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0,
@@ -34668,8 +39877,8 @@ const Select = struct {
                     .mut_gpr, .to_mut_gpr => try temp.toRegClass(true, .general_purpose, cg),
                     .x87, .to_x87 => try temp.toRegClass(false, .x87, cg),
                     .mut_x87, .to_mut_x87 => try temp.toRegClass(true, .x87, cg),
-                    .mm, .to_mm => try temp.toRegClass(false, .mmx, cg),
-                    .mut_mm, .to_mut_mm => try temp.toRegClass(true, .mmx, cg),
+                    .mmx, .to_mmx, .mm, .to_mm => try temp.toRegClass(false, .mmx, cg),
+                    .mut_mmx, .to_mut_mmx, .mut_mm, .to_mut_mm => try temp.toRegClass(true, .mmx, cg),
                     .sse, .to_sse, .xmm, .to_xmm, .ymm, .to_ymm => try temp.toRegClass(false, .sse, cg),
                     .mut_sse, .to_mut_sse, .mut_xmm, .to_mut_xmm, .mut_ymm, .to_mut_ymm => try temp.toRegClass(true, .sse, cg),
                 };
@@ -34701,7 +39910,7 @@ const Select = struct {
             umax_mem: ConstInfo,
             symbol: *const struct { lib: ?[]const u8 = null, name: []const u8 },
 
-            const ConstInfo = struct { ref: Select.Operand.Ref, vectorize_to: Memory.Size = .none };
+            const ConstInfo = struct { ref: Select.Operand.Ref, vectorize_to: ?Memory.Size = null };
 
             fn finish(kind: Kind, temp: Temp, s: *const Select) void {
                 switch (kind) {
@@ -34758,10 +39967,10 @@ const Select = struct {
                         else => .{ null, ty },
                         .vector_type => |vector_type| .{ vector_type.len, .fromInterned(vector_type.child) },
                     };
-                    const res_vector_len: ?u32 = if (const_info.vectorize_to != .none)
-                        @intCast(@divExact(@divExact(const_info.vectorize_to.bitSize(cg.target), 8), scalar_ty.abiSize(pt.zcu)))
-                    else
-                        vector_len;
+                    const res_vector_len: ?u32 = if (const_info.vectorize_to) |vectorize_to| switch (vectorize_to) {
+                        .none => null,
+                        else => @intCast(@divExact(@divExact(vectorize_to.bitSize(cg.target), 8), scalar_ty.abiSize(pt.zcu))),
+                    } else vector_len;
                     const res_scalar_ty, const res_scalar_val: Value = res_scalar: switch (scalar_ty.toIntern()) {
                         .bool_type => .{
                             scalar_ty,
@@ -34813,7 +40022,7 @@ const Select = struct {
                         })).toIntern(),
                         .storage = .{ .repeated_elem = res_scalar_val.toIntern() },
                     } })) else res_scalar_val;
-                    return try cg.tempFromValue(res_val);
+                    return try cg.tempMemFromValue(res_val);
                 },
                 .symbol => |symbol| if (cg.bin_file.cast(.elf)) |elf_file| try cg.tempInit(spec.type, .{ .lea_symbol = .{
                     .sym_index = try elf_file.getGlobalSymbol(symbol.name, symbol.lib),
@@ -34855,13 +40064,13 @@ const Select = struct {
             mem,
         };
         const Adjust = packed struct(u8) {
-            factor: i2,
-            scale: Memory.Scale,
-            amount: enum(u4) {
+            sign: enum(u1) { neg, pos },
+            lhs: enum(u4) {
                 none,
                 ptr_size,
                 ptr_bit_size,
                 size,
+                size_sub_elem_size,
                 src0_unaligned_size,
                 bit_size,
                 src0_bit_size,
@@ -34874,35 +40083,41 @@ const Select = struct {
                 smax,
                 umax,
             },
-
-            const none: Adjust = .{ .factor = 0, .scale = .@"1", .amount = .none };
-            const sub_ptr_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .ptr_size };
-            const add_ptr_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .ptr_bit_size };
-            const add_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .size };
-            const sub_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .size };
-            const add_src0_unaligned_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_unaligned_size };
-            const sub_src0_unaligned_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_unaligned_size };
-            const add_2_bit_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .bit_size };
-            const add_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .bit_size };
-            const sub_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .bit_size };
-            const add_src0_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_bit_size };
-            const sub_src0_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_bit_size };
-            const add_8_len: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .len };
-            const add_4_len: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .len };
-            const add_3_len: Adjust = .{ .factor = 1, .scale = .@"3", .amount = .len };
-            const add_2_len: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .len };
-            const add_len: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .len };
-            const sub_len: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .len };
-            const add_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size };
-            const add_2_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .src0_elem_size };
-            const add_4_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .src0_elem_size };
-            const add_8_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .src0_elem_size };
-            const sub_src0_elem_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size };
-            const add_src0_elem_size_times_src1: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size_times_src1 };
-            const sub_src0_elem_size_times_src1: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size_times_src1 };
-            const add_log2_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .log2_src0_elem_size };
-            const add_elem_limbs: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .elem_limbs };
-            const add_umax: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .umax };
+            op: enum(u1) { mul, div },
+            rhs: Memory.Scale,
+
+            const none: Adjust = .{ .sign = .pos, .lhs = .none, .op = .mul, .rhs = .@"1" };
+            const sub_ptr_size: Adjust = .{ .sign = .neg, .lhs = .ptr_size, .op = .mul, .rhs = .@"1" };
+            const add_ptr_bit_size: Adjust = .{ .sign = .pos, .lhs = .ptr_bit_size, .op = .mul, .rhs = .@"1" };
+            const add_size: Adjust = .{ .sign = .pos, .lhs = .size, .op = .mul, .rhs = .@"1" };
+            const add_size_div_8: Adjust = .{ .sign = .pos, .lhs = .size, .op = .div, .rhs = .@"8" };
+            const sub_size_div_8: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"8" };
+            const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" };
+            const add_size_sub_elem_size: Adjust = .{ .sign = .pos, .lhs = .size_sub_elem_size, .op = .mul, .rhs = .@"1" };
+            const add_src0_unaligned_size: Adjust = .{ .sign = .pos, .lhs = .src0_unaligned_size, .op = .mul, .rhs = .@"1" };
+            const sub_src0_unaligned_size: Adjust = .{ .sign = .neg, .lhs = .src0_unaligned_size, .op = .mul, .rhs = .@"1" };
+            const add_2_bit_size: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .mul, .rhs = .@"2" };
+            const add_bit_size: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .mul, .rhs = .@"1" };
+            const sub_bit_size: Adjust = .{ .sign = .neg, .lhs = .bit_size, .op = .mul, .rhs = .@"1" };
+            const add_src0_bit_size: Adjust = .{ .sign = .pos, .lhs = .src0_bit_size, .op = .mul, .rhs = .@"1" };
+            const sub_src0_bit_size: Adjust = .{ .sign = .neg, .lhs = .src0_bit_size, .op = .mul, .rhs = .@"1" };
+            const add_8_len: Adjust = .{ .sign = .pos, .lhs = .len, .op = .mul, .rhs = .@"8" };
+            const add_4_len: Adjust = .{ .sign = .pos, .lhs = .len, .op = .mul, .rhs = .@"4" };
+            const add_3_len: Adjust = .{ .sign = .pos, .lhs = .len, .op = .mul, .rhs = .@"3" };
+            const add_2_len: Adjust = .{ .sign = .pos, .lhs = .len, .op = .mul, .rhs = .@"2" };
+            const add_len: Adjust = .{ .sign = .pos, .lhs = .len, .op = .mul, .rhs = .@"1" };
+            const sub_len: Adjust = .{ .sign = .neg, .lhs = .len, .op = .mul, .rhs = .@"1" };
+            const add_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" };
+            const add_2_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"2" };
+            const add_4_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"4" };
+            const add_8_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"8" };
+            const add_src0_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .div, .rhs = .@"8" };
+            const sub_src0_elem_size: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" };
+            const add_src0_elem_size_times_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_times_src1, .op = .mul, .rhs = .@"1" };
+            const sub_src0_elem_size_times_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_times_src1, .op = .mul, .rhs = .@"1" };
+            const add_log2_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .log2_src0_elem_size, .op = .mul, .rhs = .@"1" };
+            const add_elem_limbs: Adjust = .{ .sign = .pos, .lhs = .elem_limbs, .op = .mul, .rhs = .@"1" };
+            const add_umax: Adjust = .{ .sign = .pos, .lhs = .umax, .op = .mul, .rhs = .@"1" };
         };
         const Ref = enum(u4) {
             tmp0,
@@ -35350,11 +40565,15 @@ const Select = struct {
             const UnsignedImm = @Type(.{
                 .int = .{ .signedness = .unsigned, .bits = @typeInfo(SignedImm).int.bits },
             });
-            return @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) {
+            const lhs: SignedImm = lhs: switch (op.adjust.lhs) {
                 .none => 0,
                 .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8),
                 .ptr_bit_size => s.cg.target.ptrBitWidth(),
                 .size => @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)),
+                .size_sub_elem_size => {
+                    const ty = op.base.ref.deref(s).typeOf(s.cg);
+                    break :lhs @intCast(ty.abiSize(s.cg.pt.zcu) - ty.elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu));
+                },
                 .src0_unaligned_size => @intCast(s.cg.unalignedSize(Select.Operand.Ref.src0.deref(s).typeOf(s.cg))),
                 .bit_size => @intCast(op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)),
                 .src0_bit_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)),
@@ -35376,7 +40595,20 @@ const Select = struct {
                 .umax => @bitCast(@as(UnsignedImm, std.math.maxInt(UnsignedImm)) >> @truncate(
                     -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu),
                 )),
-            }) + op.imm;
+            };
+            const rhs = op.adjust.rhs.toLog2();
+            const res = res: switch (op.adjust.op) {
+                .mul => {
+                    const res = @shlWithOverflow(lhs, rhs);
+                    assert(res[1] == 0);
+                    break :res res[0];
+                },
+                .div => @shrExact(lhs, rhs),
+            };
+            return switch (op.adjust.sign) {
+                .neg => op.imm - res,
+                .pos => op.imm + res,
+            };
         }
 
         fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand {
src/codegen/c/Type.zig
@@ -1437,6 +1437,246 @@ pub const Pool = struct {
                 return pool.fromFields(allocator, .@"struct", &fields, kind);
             },
 
+            .vector_16_i8_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .i8,
+                    .len = 16,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.i8.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_32_i8_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .i8,
+                    .len = 32,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.i8.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_16_u8_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .u8,
+                    .len = 16,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.u8.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_32_u8_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .u8,
+                    .len = 32,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.u8.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_8_i16_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .i16,
+                    .len = 8,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.i16.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_16_i16_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .i16,
+                    .len = 16,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.i16.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_8_u16_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .u16,
+                    .len = 8,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.u16.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_16_u16_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .u16,
+                    .len = 16,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.u16.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_4_i32_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .i32,
+                    .len = 4,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.i32.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_8_i32_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .i32,
+                    .len = 8,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.i32.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_4_u32_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .u32,
+                    .len = 4,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.u32.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_8_u32_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .u32,
+                    .len = 8,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.u32.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_2_i64_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .i64,
+                    .len = 2,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.i64.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_4_i64_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .i64,
+                    .len = 4,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.i64.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_2_u64_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .u64,
+                    .len = 2,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.u64.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
+            .vector_4_u64_type => {
+                const vector_ctype = try pool.getVector(allocator, .{
+                    .elem_ctype = .u64,
+                    .len = 4,
+                });
+                if (!kind.isParameter()) return vector_ctype;
+                var fields = [_]Info.Field{
+                    .{
+                        .name = .{ .index = .array },
+                        .ctype = vector_ctype,
+                        .alignas = AlignAs.fromAbiAlignment(Type.u64.abiAlignment(zcu)),
+                    },
+                };
+                return pool.fromFields(allocator, .@"struct", &fields, kind);
+            },
             .vector_4_f16_type => {
                 const vector_ctype = try pool.getVector(allocator, .{
                     .elem_ctype = .f16,
src/Air.zig
@@ -984,6 +984,22 @@ pub const Inst = struct {
         single_const_pointer_to_comptime_int_type = @intFromEnum(InternPool.Index.single_const_pointer_to_comptime_int_type),
         slice_const_u8_type = @intFromEnum(InternPool.Index.slice_const_u8_type),
         slice_const_u8_sentinel_0_type = @intFromEnum(InternPool.Index.slice_const_u8_sentinel_0_type),
+        vector_16_i8_type = @intFromEnum(InternPool.Index.vector_16_i8_type),
+        vector_32_i8_type = @intFromEnum(InternPool.Index.vector_32_i8_type),
+        vector_16_u8_type = @intFromEnum(InternPool.Index.vector_16_u8_type),
+        vector_32_u8_type = @intFromEnum(InternPool.Index.vector_32_u8_type),
+        vector_8_i16_type = @intFromEnum(InternPool.Index.vector_8_i16_type),
+        vector_16_i16_type = @intFromEnum(InternPool.Index.vector_16_i16_type),
+        vector_8_u16_type = @intFromEnum(InternPool.Index.vector_8_u16_type),
+        vector_16_u16_type = @intFromEnum(InternPool.Index.vector_16_u16_type),
+        vector_4_i32_type = @intFromEnum(InternPool.Index.vector_4_i32_type),
+        vector_8_i32_type = @intFromEnum(InternPool.Index.vector_8_i32_type),
+        vector_4_u32_type = @intFromEnum(InternPool.Index.vector_4_u32_type),
+        vector_8_u32_type = @intFromEnum(InternPool.Index.vector_8_u32_type),
+        vector_2_i64_type = @intFromEnum(InternPool.Index.vector_2_i64_type),
+        vector_4_i64_type = @intFromEnum(InternPool.Index.vector_4_i64_type),
+        vector_2_u64_type = @intFromEnum(InternPool.Index.vector_2_u64_type),
+        vector_4_u64_type = @intFromEnum(InternPool.Index.vector_4_u64_type),
         vector_4_f16_type = @intFromEnum(InternPool.Index.vector_4_f16_type),
         vector_8_f16_type = @intFromEnum(InternPool.Index.vector_8_f16_type),
         vector_4_f32_type = @intFromEnum(InternPool.Index.vector_4_f32_type),
src/InternPool.zig
@@ -4567,6 +4567,22 @@ pub const Index = enum(u32) {
     slice_const_u8_type,
     slice_const_u8_sentinel_0_type,
 
+    vector_16_i8_type,
+    vector_32_i8_type,
+    vector_16_u8_type,
+    vector_32_u8_type,
+    vector_8_i16_type,
+    vector_16_i16_type,
+    vector_8_u16_type,
+    vector_16_u16_type,
+    vector_4_i32_type,
+    vector_8_i32_type,
+    vector_4_u32_type,
+    vector_8_u32_type,
+    vector_2_i64_type,
+    vector_4_i64_type,
+    vector_2_u64_type,
+    vector_4_u64_type,
     vector_4_f16_type,
     vector_8_f16_type,
     vector_4_f32_type,
@@ -5064,6 +5080,38 @@ pub const static_keys = [_]Key{
         },
     } },
 
+    // @Vector(16, i8)
+    .{ .vector_type = .{ .len = 16, .child = .i8_type } },
+    // @Vector(32, i8)
+    .{ .vector_type = .{ .len = 32, .child = .i8_type } },
+    // @Vector(16, u8)
+    .{ .vector_type = .{ .len = 16, .child = .u8_type } },
+    // @Vector(32, u8)
+    .{ .vector_type = .{ .len = 32, .child = .u8_type } },
+    // @Vector(8, i16)
+    .{ .vector_type = .{ .len = 8, .child = .i16_type } },
+    // @Vector(16, i16)
+    .{ .vector_type = .{ .len = 16, .child = .i16_type } },
+    // @Vector(8, u16)
+    .{ .vector_type = .{ .len = 8, .child = .u16_type } },
+    // @Vector(16, u16)
+    .{ .vector_type = .{ .len = 16, .child = .u16_type } },
+    // @Vector(4, i32)
+    .{ .vector_type = .{ .len = 4, .child = .i32_type } },
+    // @Vector(8, i32)
+    .{ .vector_type = .{ .len = 8, .child = .i32_type } },
+    // @Vector(4, u32)
+    .{ .vector_type = .{ .len = 4, .child = .u32_type } },
+    // @Vector(8, u32)
+    .{ .vector_type = .{ .len = 8, .child = .u32_type } },
+    // @Vector(2, i64)
+    .{ .vector_type = .{ .len = 2, .child = .i64_type } },
+    // @Vector(4, i64)
+    .{ .vector_type = .{ .len = 4, .child = .i64_type } },
+    // @Vector(2, u64)
+    .{ .vector_type = .{ .len = 2, .child = .u64_type } },
+    // @Vector(8, u64)
+    .{ .vector_type = .{ .len = 4, .child = .u64_type } },
     // @Vector(4, f16)
     .{ .vector_type = .{ .len = 4, .child = .f16_type } },
     // @Vector(8, f16)
@@ -11703,6 +11751,22 @@ pub fn typeOf(ip: *const InternPool, index: Index) Index {
         .single_const_pointer_to_comptime_int_type,
         .slice_const_u8_type,
         .slice_const_u8_sentinel_0_type,
+        .vector_16_i8_type,
+        .vector_32_i8_type,
+        .vector_16_u8_type,
+        .vector_32_u8_type,
+        .vector_8_i16_type,
+        .vector_16_i16_type,
+        .vector_8_u16_type,
+        .vector_16_u16_type,
+        .vector_4_i32_type,
+        .vector_8_i32_type,
+        .vector_4_u32_type,
+        .vector_8_u32_type,
+        .vector_2_i64_type,
+        .vector_4_i64_type,
+        .vector_2_u64_type,
+        .vector_4_u64_type,
         .vector_4_f16_type,
         .vector_8_f16_type,
         .vector_4_f32_type,
@@ -12026,6 +12090,22 @@ pub fn zigTypeTag(ip: *const InternPool, index: Index) std.builtin.TypeId {
         .slice_const_u8_sentinel_0_type,
         => .pointer,
 
+        .vector_16_i8_type,
+        .vector_32_i8_type,
+        .vector_16_u8_type,
+        .vector_32_u8_type,
+        .vector_8_i16_type,
+        .vector_16_i16_type,
+        .vector_8_u16_type,
+        .vector_16_u16_type,
+        .vector_4_i32_type,
+        .vector_8_i32_type,
+        .vector_4_u32_type,
+        .vector_8_u32_type,
+        .vector_2_i64_type,
+        .vector_4_i64_type,
+        .vector_2_u64_type,
+        .vector_4_u64_type,
         .vector_4_f16_type,
         .vector_8_f16_type,
         .vector_4_f32_type,
src/Sema.zig
@@ -36611,6 +36611,22 @@ pub fn typeHasOnePossibleValue(sema: *Sema, ty: Type) CompileError!?Value {
         .single_const_pointer_to_comptime_int_type,
         .slice_const_u8_type,
         .slice_const_u8_sentinel_0_type,
+        .vector_16_i8_type,
+        .vector_32_i8_type,
+        .vector_16_u8_type,
+        .vector_32_u8_type,
+        .vector_8_i16_type,
+        .vector_16_i16_type,
+        .vector_8_u16_type,
+        .vector_16_u16_type,
+        .vector_4_i32_type,
+        .vector_8_i32_type,
+        .vector_4_u32_type,
+        .vector_8_u32_type,
+        .vector_2_i64_type,
+        .vector_4_i64_type,
+        .vector_2_u64_type,
+        .vector_4_u64_type,
         .vector_4_f16_type,
         .vector_8_f16_type,
         .vector_4_f32_type,
src/Type.zig
@@ -4175,6 +4175,22 @@ pub const single_const_pointer_to_comptime_int: Type = .{
 };
 pub const slice_const_u8_sentinel_0: Type = .{ .ip_index = .slice_const_u8_sentinel_0_type };
 
+pub const vector_16_i8: Type = .{ .ip_index = .vector_16_i8_type };
+pub const vector_32_i8: Type = .{ .ip_index = .vector_32_i8_type };
+pub const vector_16_u8: Type = .{ .ip_index = .vector_16_u8_type };
+pub const vector_32_u8: Type = .{ .ip_index = .vector_32_u8_type };
+pub const vector_8_i16: Type = .{ .ip_index = .vector_8_i16_type };
+pub const vector_16_i16: Type = .{ .ip_index = .vector_16_i16_type };
+pub const vector_8_u16: Type = .{ .ip_index = .vector_8_u16_type };
+pub const vector_16_u16: Type = .{ .ip_index = .vector_16_u16_type };
+pub const vector_4_i32: Type = .{ .ip_index = .vector_4_i32_type };
+pub const vector_8_i32: Type = .{ .ip_index = .vector_8_i32_type };
+pub const vector_4_u32: Type = .{ .ip_index = .vector_4_u32_type };
+pub const vector_8_u32: Type = .{ .ip_index = .vector_8_u32_type };
+pub const vector_2_i64: Type = .{ .ip_index = .vector_2_i64_type };
+pub const vector_4_i64: Type = .{ .ip_index = .vector_4_i64_type };
+pub const vector_2_u64: Type = .{ .ip_index = .vector_2_u64_type };
+pub const vector_4_u64: Type = .{ .ip_index = .vector_4_u64_type };
 pub const vector_4_f16: Type = .{ .ip_index = .vector_4_f16_type };
 pub const vector_8_f16: Type = .{ .ip_index = .vector_8_f16_type };
 pub const vector_4_f32: Type = .{ .ip_index = .vector_4_f32_type };
test/behavior/x86_64/math.zig
@@ -1,11 +1,11 @@
 const builtin = @import("builtin");
 const inf = math.inf;
 const math = std.math;
-const max = math.floatMax;
-const min = math.floatMin;
+const fmax = math.floatMax;
+const fmin = math.floatMin;
 const nan = math.nan;
 const std = @import("std");
-const trueMin = math.floatTrueMin;
+const tmin = math.floatTrueMin;
 
 const Gpr = switch (builtin.cpu.arch) {
     else => unreachable,
@@ -545,96 +545,96 @@ fn Unary(comptime op: anytype) type {
         fn testFloatTypes() !void {
             try testArgs(f16, -nan(f16));
             try testArgs(f16, -inf(f16));
-            try testArgs(f16, -max(f16));
+            try testArgs(f16, -fmax(f16));
             try testArgs(f16, -10.0);
             try testArgs(f16, -1.0);
             try testArgs(f16, -0.1);
-            try testArgs(f16, -min(f16));
-            try testArgs(f16, -trueMin(f16));
+            try testArgs(f16, -fmin(f16));
+            try testArgs(f16, -tmin(f16));
             try testArgs(f16, -0.0);
             try testArgs(f16, 0.0);
-            try testArgs(f16, trueMin(f16));
-            try testArgs(f16, min(f16));
+            try testArgs(f16, tmin(f16));
+            try testArgs(f16, fmin(f16));
             try testArgs(f16, 0.1);
             try testArgs(f16, 1.0);
             try testArgs(f16, 10.0);
-            try testArgs(f16, max(f16));
+            try testArgs(f16, fmax(f16));
             try testArgs(f16, inf(f16));
             try testArgs(f16, nan(f16));
 
             try testArgs(f32, -nan(f32));
             try testArgs(f32, -inf(f32));
-            try testArgs(f32, -max(f32));
+            try testArgs(f32, -fmax(f32));
             try testArgs(f32, -10.0);
             try testArgs(f32, -1.0);
             try testArgs(f32, -0.1);
-            try testArgs(f32, -min(f32));
-            try testArgs(f32, -trueMin(f32));
+            try testArgs(f32, -fmin(f32));
+            try testArgs(f32, -tmin(f32));
             try testArgs(f32, -0.0);
             try testArgs(f32, 0.0);
-            try testArgs(f32, trueMin(f32));
-            try testArgs(f32, min(f32));
+            try testArgs(f32, tmin(f32));
+            try testArgs(f32, fmin(f32));
             try testArgs(f32, 0.1);
             try testArgs(f32, 1.0);
             try testArgs(f32, 10.0);
-            try testArgs(f32, max(f32));
+            try testArgs(f32, fmax(f32));
             try testArgs(f32, inf(f32));
             try testArgs(f32, nan(f32));
 
             try testArgs(f64, -nan(f64));
             try testArgs(f64, -inf(f64));
-            try testArgs(f64, -max(f64));
+            try testArgs(f64, -fmax(f64));
             try testArgs(f64, -10.0);
             try testArgs(f64, -1.0);
             try testArgs(f64, -0.1);
-            try testArgs(f64, -min(f64));
-            try testArgs(f64, -trueMin(f64));
+            try testArgs(f64, -fmin(f64));
+            try testArgs(f64, -tmin(f64));
             try testArgs(f64, -0.0);
             try testArgs(f64, 0.0);
-            try testArgs(f64, trueMin(f64));
-            try testArgs(f64, min(f64));
+            try testArgs(f64, tmin(f64));
+            try testArgs(f64, fmin(f64));
             try testArgs(f64, 0.1);
             try testArgs(f64, 1.0);
             try testArgs(f64, 10.0);
-            try testArgs(f64, max(f64));
+            try testArgs(f64, fmax(f64));
             try testArgs(f64, inf(f64));
             try testArgs(f64, nan(f64));
 
             try testArgs(f80, -nan(f80));
             try testArgs(f80, -inf(f80));
-            try testArgs(f80, -max(f80));
+            try testArgs(f80, -fmax(f80));
             try testArgs(f80, -10.0);
             try testArgs(f80, -1.0);
             try testArgs(f80, -0.1);
-            try testArgs(f80, -min(f80));
-            try testArgs(f80, -trueMin(f80));
+            try testArgs(f80, -fmin(f80));
+            try testArgs(f80, -tmin(f80));
             try testArgs(f80, -0.0);
             try testArgs(f80, 0.0);
-            try testArgs(f80, trueMin(f80));
-            try testArgs(f80, min(f80));
+            try testArgs(f80, tmin(f80));
+            try testArgs(f80, fmin(f80));
             try testArgs(f80, 0.1);
             try testArgs(f80, 1.0);
             try testArgs(f80, 10.0);
-            try testArgs(f80, max(f80));
+            try testArgs(f80, fmax(f80));
             try testArgs(f80, inf(f80));
             try testArgs(f80, nan(f80));
 
             try testArgs(f128, -nan(f128));
             try testArgs(f128, -inf(f128));
-            try testArgs(f128, -max(f128));
+            try testArgs(f128, -fmax(f128));
             try testArgs(f128, -10.0);
             try testArgs(f128, -1.0);
             try testArgs(f128, -0.1);
-            try testArgs(f128, -min(f128));
-            try testArgs(f128, -trueMin(f128));
+            try testArgs(f128, -fmin(f128));
+            try testArgs(f128, -tmin(f128));
             try testArgs(f128, -0.0);
             try testArgs(f128, 0.0);
-            try testArgs(f128, trueMin(f128));
-            try testArgs(f128, min(f128));
+            try testArgs(f128, tmin(f128));
+            try testArgs(f128, fmin(f128));
             try testArgs(f128, 0.1);
             try testArgs(f128, 1.0);
             try testArgs(f128, 10.0);
-            try testArgs(f128, max(f128));
+            try testArgs(f128, fmax(f128));
             try testArgs(f128, inf(f128));
             try testArgs(f128, nan(f128));
         }
@@ -1391,26 +1391,51 @@ fn Binary(comptime op: anytype) type {
             );
         }
         fn testIntTypes() !void {
+            try testArgs(i8, 0x48, 0x6c);
             try testArgs(u8, 0xbb, 0x43);
+            try testArgs(i16, -0x0fdf, 0x302e);
             try testArgs(u16, 0xb8bf, 0x626d);
+            try testArgs(i32, -0x6280178f, 0x6802c034);
             try testArgs(u32, 0x80d7a2c6, 0xbff6a402);
+            try testArgs(i64, 0x0365a53b8ee0c987, -0x1bb6d3013500a7d2);
             try testArgs(u64, 0x71138bc6b4a38898, 0x1bc4043de9438c7b);
+            try testArgs(i128, 0x76d428c46cdeaa2ac43de8abffb22f6d, 0x427f7545abe434a12544fdbe2a012889);
             try testArgs(u128, 0xe05fc132ef2cd8affee00a907f0a851f, 0x29f912a72cfc6a7c6973426a9636da9a);
+            try testArgs(
+                i256,
+                -0x53d4148cee74ea43477a65b3daa7b8fdadcbf4508e793f4af113b8d8da5a7eb6,
+                -0x30dcbaf7b9b7a3df033694e6795444d842fb0b8f79bc18b3ea8a6b7ccad3ea91,
+            );
             try testArgs(
                 u256,
                 0xb7935f5c2f3b1ae7a422c0a7c446884294b7d5370bada307d2fe5a4c4284a999,
                 0x310e6e196ba4f143b8d285ca6addf7f3bb3344224aff221b27607a31e148be08,
             );
+            try testArgs(
+                i258,
+                -0x0eee283365108dbeea0bec82f5147418d8ffe86f9eed00e414b4eccd65c21239a,
+                -0x122c730073fc29a24cd6e3e6263566879bc5325d8566b8db31fcb4a76f7ab95eb,
+            );
             try testArgs(
                 u258,
                 0x186d5ddaab8cb8cb04e5b41e36f812e039d008baf49f12894c39e29a07796d800,
                 0x2072daba6ffad168826163eb136f6d28ca4360c8e7e5e41e29755e19e4753a4f5,
             );
+            try testArgs(
+                i495,
+                0x2fe6bc5448c55ce18252e2c9d44777505dfe63ff249a8027a6626c7d8dd9893fd5731e51474727be556f757facb586a4e04bbc0148c6c7ad692302f46fbd,
+                -0x016a358821ef8240172f3a08e8830c06e6bcf2225f5f4d41ed42b44d249385f55cc594e1278ecac31c73faed890e5054af1a561483bb1bb6fb1f753514cf,
+            );
             try testArgs(
                 u495,
                 0x6eaf4e252b3bf74b75bac59e0b43ca5326bad2a25b3fdb74a67ef132ac5e47d72eebc3316fb2351ee66c50dc5afb92a75cea9b0e35160652c7db39eeb158,
                 0x49fbed744a92b549d8c05bb3512c617d24dd824f3f69bdf3923bc326a75674b85f5b828d2566fab9c86f571d12c2a63c9164feb0d191d27905533d09622a,
             );
+            try testArgs(
+                i512,
+                -0x3a6876ca92775286c6e1504a64a9b8d56985bebf4a1b66539d404e0e96f24b226f70c4bcff295fdc2043b82513b2052dc45fd78f7e9e80e5b3e101757289f054,
+                0x5080c516a819bd32a0a5f0976441bbfbcf89e77684f1f10eb326aeb28e1f8d593278cff60fc99b8ffc87d8696882c64728dd3c322b7142803f4341f85a03bc10,
+            );
             try testArgs(
                 u512,
                 0xe5b1fedca3c77db765e517aabd05ffc524a3a8aff1784bbf67c45b894447ede32b65b9940e78173c591e56e078932d465f235aece7ad47b7f229df7ba8f12295,
@@ -1418,6 +1443,69 @@ fn Binary(comptime op: anytype) type {
             );
         }
         fn testIntVectorTypes() !void {
+            try testArgs(@Vector(1, i8), .{
+                -0x54,
+            }, .{
+                0x0f,
+            });
+            try testArgs(@Vector(2, i8), .{
+                -0x4d, 0x55,
+            }, .{
+                0x7d, -0x5d,
+            });
+            try testArgs(@Vector(4, i8), .{
+                0x73, 0x6f, 0x6e, -0x49,
+            }, .{
+                -0x66, 0x23, 0x21, -0x56,
+            });
+            try testArgs(@Vector(8, i8), .{
+                0x44, -0x37, 0x33, -0x2b, -0x1f, 0x3e, 0x50, -0x4d,
+            }, .{
+                0x6a, 0x1a, -0x0e, 0x4c, -0x46, 0x03, -0x17, 0x3e,
+            });
+            try testArgs(@Vector(16, i8), .{
+                -0x52, 0x1a, -0x4b, 0x4e, -0x75, 0x33, -0x43, 0x30, 0x71, -0x30, -0x73, -0x53, 0x64, 0x1f, -0x27, 0x36,
+            }, .{
+                0x65, 0x77, -0x62, 0x0f, 0x15, 0x52, 0x5c, 0x12, -0x10, 0x36, 0x6d, 0x42, -0x24, -0x79, -0x32, -0x75,
+            });
+            try testArgs(@Vector(32, i8), .{
+                -0x12, -0x1e, 0x18, 0x6e, 0x31,  0x53,  -0x6a, -0x34, 0x13,  0x4d, 0x30, -0x7d, -0x31, 0x1e,  -0x24, 0x32,
+                -0x1e, -0x01, 0x55, 0x33, -0x75, -0x44, -0x57, 0x2b,  -0x66, 0x19, 0x7f, -0x28, -0x3f, -0x7e, -0x5d, -0x06,
+            }, .{
+                0x05, -0x23, 0x43,  -0x54, -0x41, 0x7f,  -0x6a, -0x31, 0x04,  0x15, -0x7a, -0x37, 0x6d, 0x16,  0x00,  0x4a,
+                0x15, 0x55,  -0x4a, 0x16,  -0x73, -0x0c, 0x1c,  -0x26, -0x14, 0x00, 0x55,  0x7b,  0x16, -0x2e, -0x5f, -0x67,
+            });
+            try testArgs(@Vector(64, i8), .{
+                -0x05, 0x76,  0x4e,  -0x5c, 0x7b,  -0x1a, -0x38, -0x2e, 0x3d,  0x36,  0x01,  0x30,  -0x02, -0x71, -0x24, 0x24,
+                -0x2e, -0x6e, -0x60, 0x74,  -0x80, -0x1c, -0x34, -0x08, -0x33, 0x77,  0x1c,  -0x0f, 0x45,  -0x51, -0x1d, 0x35,
+                -0x45, 0x44,  0x27,  -0x3c, 0x6b,  0x58,  -0x6a, -0x26, 0x06,  -0x30, -0x21, -0x0a, 0x60,  -0x11, -0x05, 0x75,
+                0x38,  0x72,  -0x6d, -0x1f, -0x7f, 0x74,  -0x6b, -0x14, -0x80, 0x35,  -0x0f, -0x1e, 0x6a,  0x17,  -0x74, -0x6c,
+            }, .{
+                -0x5d, 0x2d,  0x55,  0x40,  -0x7c, 0x67,  0x61,  0x5f,  0x14,  0x5b, -0x0c, -0x4d, -0x5f, 0x25,  0x36,  0x3c,
+                -0x75, -0x48, -0x2b, 0x76,  -0x57, -0x4a, 0x1d,  0x65,  -0x32, 0x18, -0x2a, -0x0a, -0x6e, -0x3c, -0x62, 0x4e,
+                -0x24, -0x3c, 0x7d,  -0x79, -0x1a, -0x14, -0x03, -0x56, 0x7a,  0x5f, 0x64,  -0x68, 0x5f,  -0x10, -0x63, -0x07,
+                0x79,  -0x44, 0x47,  0x7d,  0x6e,  0x77,  0x03,  -0x4e, 0x67,  0x38, 0x46,  -0x44, -0x41, 0x66,  -0x16, -0x0a,
+            });
+            try testArgs(@Vector(128, i8), .{
+                0x30,  0x70,  -0x2a, -0x29, -0x35, -0x69, -0x18, 0x2b,  0x4a,  -0x17, -0x5f, -0x36, 0x34,  -0x26, 0x03,  -0x2d,
+                -0x75, -0x27, -0x07, -0x49, -0x58, 0x00,  -0x45, 0x5d,  -0x11, -0x68, 0x34,  0x73,  -0x4d, 0x7f,  -0x25, -0x6a,
+                0x46,  -0x1d, -0x68, 0x04,  0x64,  -0x0d, 0x30,  0x27,  -0x24, 0x67,  0x3c,  -0x7c, -0x2e, -0x24, 0x24,  0x3e,
+                -0x2c, -0x05, 0x4e,  -0x17, 0x6d,  0x57,  0x76,  0x35,  -0x3d, 0x51,  0x71,  -0x4e, 0x50,  0x26,  0x4a,  -0x42,
+                0x73,  -0x36, -0x5d, 0x2a,  0x55,  0x33,  -0x2b, -0x76, 0x08,  0x43,  0x77,  -0x73, -0x0a, 0x5c,  -0x03, -0x50,
+                -0x0a, -0x1c, -0x20, 0x3c,  -0x7e, 0x60,  0x11,  -0x77, 0x25,  -0x71, 0x31,  0x2d,  -0x4b, -0x26, -0x2a, 0x7f,
+                -0x1f, 0x23,  -0x34, -0x1f, 0x35,  0x0d,  0x3e,  0x76,  -0x08, 0x2c,  0x12,  0x3e,  -0x09, -0x3e, 0x4b,  -0x52,
+                -0x1a, -0x44, -0x53, -0x41, -0x6d, -0x5e, -0x06, -0x04, 0x3f,  -0x2e, 0x01,  0x54,  0x19,  -0x5a, -0x62, -0x3a,
+            }, .{
+                0x42,  -0x11, -0x08, -0x64, -0x55, 0x31,  0x27,  -0x66, 0x38,  0x5a,  0x25,  -0x68, 0x0b,  -0x41, -0x0d, 0x60,
+                -0x17, -0x6d, 0x62,  -0x65, -0x5e, -0x1c, -0x35, 0x28,  0x1c,  -0x74, -0x7f, -0x1c, 0x3a,  0x4e,  0x05,  -0x08,
+                0x30,  -0x77, 0x03,  0x68,  -0x2c, 0x5c,  0x74,  0x6a,  -0x21, 0x0a,  0x36,  -0x55, 0x21,  0x29,  -0x05, 0x70,
+                0x23,  0x3b,  0x0a,  0x7a,  0x19,  0x14,  0x65,  -0x1d, 0x2b,  0x65,  0x33,  0x2a,  0x52,  -0x63, 0x57,  0x10,
+                -0x1b, 0x26,  -0x46, -0x7e, -0x25, 0x79,  -0x01, -0x0d, -0x49, -0x4d, 0x74,  0x03,  0x77,  0x16,  0x03,  -0x3d,
+                0x1c,  0x25,  0x5a,  -0x2f, -0x16, -0x5f, -0x36, -0x55, -0x44, -0x0c, -0x0f, 0x7b,  -0x15, -0x1d, 0x32,  0x31,
+                0x6e,  -0x44, -0x4a, -0x64, 0x67,  0x04,  0x47,  0x00,  0x3c,  -0x0a, -0x79, 0x3d,  0x48,  0x5a,  0x61,  -0x2c,
+                0x6d,  -0x68, -0x71, -0x6b, -0x11, 0x44,  -0x75, -0x55, -0x67, -0x52, 0x64,  -0x3d, -0x05, -0x76, -0x6d, -0x44,
+            });
+
             try testArgs(@Vector(1, u8), .{
                 0x1f,
             }, .{
@@ -1481,6 +1569,67 @@ fn Binary(comptime op: anytype) type {
                 0x56, 0x4f, 0xf1, 0xaa, 0x0a, 0x0f, 0xdb, 0x1b, 0xc8, 0x45, 0x9b, 0x12, 0xb4, 0x1a, 0xe4, 0xa3,
             });
 
+            try testArgs(@Vector(1, i16), .{
+                -0x7b9c,
+            }, .{
+                0x600a,
+            });
+            try testArgs(@Vector(2, i16), .{
+                0x43cc, -0x1421,
+            }, .{
+                -0x2b0e, 0x4d99,
+            });
+            try testArgs(@Vector(4, i16), .{
+                0x558f, 0x6d92, 0x488f, 0x0a04,
+            }, .{
+                -0x01a9,
+                0x2ee4,
+                0x24a9,
+                -0x5fee,
+            });
+            try testArgs(@Vector(8, i16), .{
+                -0x7e5d, -0x02e4, -0x3a72, -0x2e30, 0x7c87, 0x3ea0, 0x4f02, 0x06e4,
+            }, .{
+                -0x417f, 0x5a13, -0x117b, 0x4c28, -0x3769, -0x56a8, 0x1656, -0x4431,
+            });
+            try testArgs(@Vector(16, i16), .{
+                0x04be,  0x774a, 0x7395,  -0x6ca2, -0x21a0, 0x35be, 0x186c,  0x5991,
+                -0x1a82, 0x4527, -0x2278, -0x3554, 0x42c1,  0x7f53, -0x670d, 0x1fad,
+            }, .{
+                0x7a7d,  0x47dd,  0x1975,  0x4028, 0x26ef,  -0x24f5, -0x77c9, -0x19a5,
+                -0x4b04, -0x6939, -0x1b8d, 0x3718, -0x78e6, 0x0941,  -0x1208, -0x392d,
+            });
+            try testArgs(@Vector(32, i16), .{
+                0x4cde,  0x3ab0,  0x354e,  0x0bc0,  -0x5333, 0x4857,  -0x7ccf, -0x69da,
+                0x6ab8,  0x2bf3,  0x1c5a,  0x7b11,  -0x5653, 0x7bc5,  0x497e,  -0x0b55,
+                0x7aa8,  -0x5a8c, -0x6d05, 0x6210,  0x1b64,  0x3f6f,  0x1a02,  0x65e4,
+                -0x6795, 0x5867,  -0x6faf, -0x07cb, -0x762c, -0x7500, 0x1f1c,  -0x4348,
+            }, .{
+                0x72f6,  -0x5405, -0x3aac, 0x2857,  0x34cd,  -0x1dce, -0x56d8, 0x7150,
+                -0x6549, 0x61bd,  -0x3a9f, -0x1e02, -0x5a5a, -0x7910, -0x166d, 0x7c8e,
+                -0x5292, -0x6c6e, -0x37e3, 0x1514,  0x1787,  0x58cb,  -0x4d99, -0x6c15,
+                0x592e,  -0x045f, 0x7682,  -0x1eef, 0x1fb2,  -0x7117, -0x2a17, -0x2d8e,
+            });
+            try testArgs(@Vector(64, i16), .{
+                0x29c3,  -0x1b1f, -0x17ce, -0x50d0, -0x5de3, 0x5ffd,  0x184a,  -0x7769,
+                0x445e,  0x0d8a,  0x7844,  -0x757d, 0x2b32,  0x5374,  -0x6ab2, -0x71c4,
+                0x38f9,  0x347f,  0x2d4c,  0x69a4,  -0x2f92, -0x4479, 0x427b,  -0x0c5f,
+                0x15ae,  0x2c86,  0x1864,  -0x0095, 0x6803,  -0x3484, 0x1001,  -0x0560,
+                -0x0824, 0x7bf6,  0x7a3c,  -0x458a, -0x65cc, -0x54b1, -0x75c6, 0x782e,
+                0x35a7,  -0x3188, -0x58ba, 0x40d0,  -0x4a9c, 0x6b79,  0x1ef5,  0x67a2,
+                -0x3fb8, 0x1885,  -0x093d, -0x4802, 0x0379,  0x2f52,  0x7f1f,  0x256c,
+                0x1052,  0x1b3b,  -0x6146, 0x7e0d,  0x79ca,  -0x79ee, 0x3d58,  0x7482,
+            }, .{
+                -0x0017, -0x3fdd, -0x6f93, 0x6178,  0x5c2b,  0x4eb3,  0x685b,  0x12c8,
+                0x0290,  -0x34f4, -0x6572, 0x3ab6,  -0x3ed1, -0x5e5f, 0x3a90,  -0x4540,
+                -0x2098, 0x6bde,  0x1246,  0x2212,  -0x4d6a, -0x2a5a, 0x5cc4,  -0x240f,
+                0x51b2,  0x5ec0,  -0x5b5f, -0x1b6e, -0x57a5, -0x06bd, -0x5132, 0x7889,
+                0x2817,  0x6ada,  -0x6b46, -0x6a37, -0x6475, -0x5ff4, 0x5a27,  0x1dfa,
+                0x6bd6,  -0x49da, -0x09bf, -0x7c53, 0x2cd3,  -0x6be0, -0x2dca, 0x44bd,
+                -0x1b95, 0x7680,  -0x5bb0, 0x7ad7,  -0x1988, 0x149f,  0x631e,  -0x1d2d,
+                0x632b,  0x55c7,  -0x3433, 0x0dde,  -0x27a7, 0x560e,  -0x2063, 0x4570,
+            });
+
             try testArgs(@Vector(1, u16), .{
                 0x9d6f,
             }, .{
@@ -1539,6 +1688,59 @@ fn Binary(comptime op: anytype) type {
                 0x2c02, 0xff5b, 0x19ca, 0xbbf5, 0x870e, 0xc9ca, 0x47bb, 0xcfcc,
             });
 
+            try testArgs(@Vector(1, i32), .{
+                0x7aef7b1e,
+            }, .{
+                0x60310858,
+            });
+            try testArgs(@Vector(2, i32), .{
+                -0x21910ac9, 0x669f37ef,
+            }, .{
+                0x1a2a1681, 0x003b1fdf,
+            });
+            try testArgs(@Vector(4, i32), .{
+                0x7906cf0d, 0x4818a45f, -0x0a2833b6, 0x51a018c9,
+            }, .{
+                -0x05a3e6a7, -0x47f4a500, 0x50d1141f, -0x264c85c2,
+            });
+            try testArgs(@Vector(8, i32), .{
+                0x7566235a,  -0x7720144f, -0x7d4f5489, 0x3cd736c8,
+                -0x77388801, 0x4e7f955a,  0x4cdf52bc,  0x50b0b53f,
+            }, .{
+                0x00ed6fc5, 0x37320361, 0x70c563c2,  -0x09acb495,
+                0x0688e83f, 0x797295c4, -0x23bfbfdb, 0x38552096,
+            });
+            try testArgs(@Vector(16, i32), .{
+                -0x0214589d, 0x74a7537f,  0x7a7dcb26, 0x3e2e4c44,
+                -0x23bfc358, 0x60e8ef18,  0x5524a7bc, -0x3d88c153,
+                -0x7dc8ff0f, 0x6e2698f6,  0x05641ab8, -0x45e9e405,
+                -0x7c1a04d0, -0x4a8d1e91, 0x41d56723, 0x4ba924ab,
+            }, .{
+                -0x528dc756, -0x6bc217f4, 0x40789b06, 0x65f08d3a,
+                -0x077140ea, -0x43bdaa79, 0x5d98f4e7, -0x2356a1ca,
+                -0x36ef2b49, -0x7cd09b06, 0x71c8176e, 0x5b005860,
+                0x6ce8cfab,  -0x49fd7609, 0x6cbb4e33, 0x6c7c121d,
+            });
+            try testArgs(@Vector(32, i32), .{
+                0x7d22905d,  -0x354e4bbe, -0x68662618, -0x246e1858,
+                -0x1c4285a9, -0x0338059c, -0x60f5bbf4, -0x04f06917,
+                -0x55f837b6, -0x2fba5fe3, 0x092aabf4,  -0x5f533b31,
+                0x6e81a558,  -0x7bcac358, 0x6c4d8d04,  0x3e2f9852,
+                -0x78589b1a, -0x68a00fd4, -0x77d55e25, 0x7f79b51c,
+                -0x66b88f45, 0x7f6dc8a5,  -0x27299a82, -0x426c8e1c,
+                0x0c288f16,  0x158f8c3f,  0x26708be1,  -0x0b73626e,
+                -0x32df1bee, 0x196330f4,  -0x68bb9529, -0x26376ab6,
+            }, .{
+                0x63bd0bd4,  0x4e507611,  -0x5e5222b8, -0x35d8e114,
+                0x1feab77b,  -0x20de7dfd, -0x0ed0b09f, -0x7fc3d585,
+                -0x2d3018e9, -0x261d431b, 0x54451864,  0x1415288f,
+                -0x3ab89593, -0x7060e4c1, -0x54fcd501, -0x26324630,
+                0x53fc8294,  0x2d4aceef,  -0x4ac8efd2, -0x2fec97b7,
+                -0x4de3a2fc, 0x2269fe52,  -0x58c8b473, -0x21026285,
+                -0x23438776, 0x3d5c8c41,  -0x1fc946b2, -0x161c7005,
+                0x44913ff1,  -0x76e2bfaa, -0x54636350, -0x6ec53870,
+            });
+
             try testArgs(@Vector(1, u32), .{
                 0x1d0d9cc4,
             }, .{
@@ -1592,6 +1794,54 @@ fn Binary(comptime op: anytype) type {
                 0xf080e943, 0xc8718d14, 0x3f920382, 0x18d101b5,
             });
 
+            try testArgs(@Vector(1, i64), .{
+                0x4a31679b316d8b59,
+            }, .{
+                0x34a583368386afde,
+            });
+            try testArgs(@Vector(2, i64), .{
+                0x3bae373f9cb990b3, -0x7e8c6c876e8fd34a,
+            }, .{
+                0x09dbef6f7cb9c726, 0x48dfeca879b0df51,
+            });
+            try testArgs(@Vector(4, i64), .{
+                -0x2bd24dd5f5da94bf, -0x144113bae33082c2,
+                0x51e8cb7027ba4b12,  -0x47b02168e2e22f13,
+            }, .{
+                0x769f113245641b91,  -0x414d0e24ea97bc53,
+                -0x0d2a570e7ef9e923, -0x070513d46d3b5a4c,
+            });
+            try testArgs(@Vector(8, i64), .{
+                0x10bb6779b6a55ca9,  0x5f6ffd567a187af4,
+                -0x6ba191b1168486b4, -0x441b92ce455870a1,
+                0x2b6fdefbec9386ad,  -0x6fdd3938d79217e4,
+                0x6aa8fe1fb891501f,  0x20802f5bbdf6dc50,
+            }, .{
+                -0x7500319df437b479, 0x00ceb712d4fa62d4,
+                0x67e715b9e99e660d,  -0x17ae00e1f0009ec2,
+                -0x5b700b948503acdf, -0x3ff61fb5cce5a530,
+                0x55a3efac2e3694a4,  0x7f951a8d842f1670,
+            });
+            try testArgs(@Vector(16, i64), .{
+                0x37a205109a685810,  -0x50ff5d13134ccaa6,
+                0x26813391c5505d5d,  -0x502cdc01603a2f21,
+                -0x6b1b44b1c850c7ea, 0x1f6db974ace9dd70,
+                -0x47d15da8b519e328, 0x3ac0763abbf79d8d,
+                0x5f12e0dc1aed4a4f,  -0x46a973e16061e928,
+                -0x3f59a3fa9699b4d5, -0x2f5012d390c78315,
+                -0x40e510dea2c47e9c, 0x221c51defe0acc9a,
+                -0x385fd6f1d390b84b, 0x35932fe2783fa6b9,
+            }, .{
+                0x0ba5202b71ad73dd,  0x65c8d2d5e2a14fe5,
+                0x2e4d97cd66c41a3d,  0x14babbb47da51193,
+                0x59d1d12b42ade3aa,  -0x3c3617e556dfa8fb,
+                -0x5a36602ba43279c4, -0x61f1ddda13665d9f,
+                -0x50cd6128589ddd04, 0x135ae0dcc85674ae,
+                -0x25e80592affc038d, 0x07e184c44fbe9b12,
+                -0x70ede1b90964bbaa, 0x3ec48b32e8efd98e,
+                -0x5267d41d85a29f46, 0x53099805f9116b60,
+            });
+
             try testArgs(@Vector(1, u64), .{
                 0x333f593bf9d08546,
             }, .{
@@ -1640,6 +1890,49 @@ fn Binary(comptime op: anytype) type {
                 0xed533d18f8657f3f, 0x1ddd7cd7f6bab957,
             });
 
+            try testArgs(@Vector(1, i128), .{
+                -0x3bb56309fcad13fc1011dc671cf57bdc,
+            }, .{
+                -0x05338bb517db516ee08c45d1408e5836,
+            });
+            try testArgs(@Vector(2, i128), .{
+                0x295f2901e3837e5592b9435f8c4df8a7,
+                -0x1f246b0ff2d02a6bf30a63392fc63371,
+            }, .{
+                -0x31060c09e29b545670c4cbc721a4e26b,
+                -0x631eb286321325d51c617aa798195392,
+            });
+            try testArgs(@Vector(4, i128), .{
+                0x47110102c74f620f08e5b7c5dbe193c2,
+                -0x61d12d2650413ad3ffeeeab3ba57e1f0,
+                0x449781e64b29dc8a17a88f4b7a5b0717,
+                0x0d2170e9238d12a585dc5377566e1938,
+            }, .{
+                0x0bf948e19bd01823dcb3887937d97079,
+                -0x16f933ab12bfba3560d0d39ffe69b64a,
+                0x3d0bfce3907a5cd157348f0329e2548e,
+                -0x3c2d182e2e238a4bebd7defbd7f9699a,
+            });
+            try testArgs(@Vector(8, i128), .{
+                -0x775678727c721662f02480619acbfc82,
+                -0x6f504fcbff673cb91e4706af4373665f,
+                -0x670f888d4186387c3106d125b856c294,
+                0x0641e7efdfdd924d126b446d874154f8,
+                0x57d7aef0f82d3351917f43c8f677392b,
+                -0x4077e745dede8367d145c94f20ab8810,
+                -0x0344a74fb60e1f1f72ba8ec288b05939,
+                -0x0be3ce9be461aca1d25ad8e74dcc36e1,
+            }, .{
+                -0x4a873d91e5a2331def0d34c008d33d83,
+                0x2744cecfd4c683bdd12f3cfc11d7f520,
+                -0x0cb8e468fc1de93a7c5ad2a5a61e8f50,
+                -0x1a3be9e58e918d6586cc4948a54515d3,
+                -0x512ec6f88c3a34950a8aaee47130120b,
+                -0x2e772e4a8812e553bcf9b2754a493709,
+                0x0c7b137937dc25f9f9cbaf4d7a88ee6b,
+                -0x2ecdd5eb81eb0e98ed8d0aa9516c1617,
+            });
+
             try testArgs(@Vector(1, u128), .{
                 0x5f11e16b0ca3392f907a857881455d2e,
             }, .{
@@ -1683,6 +1976,30 @@ fn Binary(comptime op: anytype) type {
                 0xa858bce5ad0e48c13588a4e170e8667c,
             });
 
+            try testArgs(@Vector(1, i256), .{
+                0x1fe30aed39db1accf4d1b43845aec28c1094b500492555fdf59b4f2f85c6a1ce,
+            }, .{
+                0x6932f4faf261c45ecd701a4fe3015d4255e486b04c4ab448fe162980cead63fb,
+            });
+            try testArgs(@Vector(2, i256), .{
+                -0x23daa9bab59dc1e685f4220c189930c3420a55784f0dec1028c2778d907ccfe2,
+                0x521c992e4f46d61709d39e076ed94d5d884585f85ccbf71ca4d593da34f61bf5,
+            }, .{
+                0x2d880cb5aa793218a32411389db31e935932029645573a9625dd174099c9e5b2,
+                0x2394a6cde7e8b2dc2995f07f22f815baa6c223d99c0b1ec4b2d8abd0094db853,
+            });
+            try testArgs(@Vector(4, i256), .{
+                0x244e66ed932a4d970fd8735c10bfbd5f59bd4452c20fa0fcf873823b8c9e6321,
+                -0x31577b747614b1ab83fd0178293cd80b3cb92e739459b2d038688a2471f6d659,
+                -0x0dbdfc3d8bbd7cab6a33598cef29125aab7571fb0db9a528e42966963d6ce0e7,
+                -0x72c58cce172d8a34019a44407a4baf1f8f8a4a611711bd5bb4daa2a2739dd67b,
+            }, .{
+                -0x2e88bc68893fc2d61af0e5ccb541f31fa6169504e8cfcbeab0b74a03b9e86c33,
+                -0x7eba0783f3382b59a17ffbea57ba1dd8fa30e2d4f7eba7ed68d336d3c37b4561,
+                -0x66d1463efd38e9e994e126d09b5c65c8efc932ffea9ec6cdf6042561ba05f801,
+                0x2024bbacefbabbfd5b32a09be631451764a1f889a77918f9094382dc6d02aef2,
+            });
+
             try testArgs(@Vector(1, u256), .{
                 0x28df37e1f57a56133ba3f5b5b2164ce24eb6c29a8973a597fd91fbee8ab4bafb,
             }, .{
@@ -1707,6 +2024,19 @@ fn Binary(comptime op: anytype) type {
                 0xe47a122bd45d5e7d69722d864a6b795ddee965a0993094f8791dd309d692de8b,
             });
 
+            try testArgs(@Vector(1, i512), .{
+                -0x439ba81b44584e0c4d7abc80d18ab9d679a4e921884e877b28d04eb15b2d3e7be8d670b0aba2c4cc25c12655e1899ab514d0a6e50a221bcf076d506e6411d5c2,
+            }, .{
+                0x18b1d3be5a03310d82859a4ab72f056a33d1a4b554522bcc062fb33eda3b8111045ee79e045dd1a665d250b897f6f2e12003a03313c2547698f8c1eab452eae1,
+            });
+            try testArgs(@Vector(2, i512), .{
+                0x28e2ab84d87d5fb12be65d8650de67b992dd162fe563ca74b62f51f2f32e1084e03e32c8370930816445ac5052b4d345059c8ace582e3ef44377b160e265ec9b,
+                -0x3a96548c707219326c42063997e71bc7a17b3067d402063843f84c86e747b71e09338079c28943d20601c0cde018bad57f5615fc89784bcb6232e45c54dff1db,
+            }, .{
+                0x64beecc90609b7156653b75a861e174c58fb42d5c7bf8d793efbb1cbe785c6b8cd52ce5f9aa859f174123c387820d40a2f93122b81396d739eb85c3ea33fcd37,
+                -0x3632e347bc6d794940424ca0945dafa04328a924ec6b0ccdedcda6d296e09aa2dd5dca83b934cac752993238aa4fe826be8d62991c9347bae6f01bc0b1b4223d,
+            });
+
             try testArgs(@Vector(1, u512), .{
                 0x651058c1d89a8f34cfc5e66b6d25294eecfcc4a7e1e4a356eb51ee7d7b2db25378e4afee51b7d18d16e520772a60c50a02d7966f40ced1870b32c658e5821397,
             }, .{
@@ -1720,6 +2050,12 @@ fn Binary(comptime op: anytype) type {
                 0x8fa3d86099e9e2789d72f8e792290356d659ab20ac0414ff94745984c6ae7d986082197bb849889f912e896670aa2c1a11bd7e66e3f650710b0f0a18a1533f90,
             });
 
+            try testArgs(@Vector(1, i1024), .{
+                -0x4fe568569c0531c9bfbbda1516e93a6c61a3d035c98e13fdc85225165a3bea84d5dc6b610ced008f9321453af42ea50bbf6881d40d2759b73b9b6186c0d6d243f367e292cbbf6b5c5c30d7f4e8de19701c7b0fc9e67cdf31228daa1675a4887f6c4f1588b48855d6f4730a21f27dec8a756c568727709b65cd531020d53ff394,
+            }, .{
+                -0x7cab2a053dfbf944cd342460350c989fd1b4469a6c7b54ddcacd54e605d29c03651b5c463495610d82269c9ac5b51bfd07816a0f7b1ab50cb598989ed64607b3faff79a190702eb285b0fedc050ec1a71537abc47ec590eb671d4f76b19567049ba4789d1a4348385607a0320fbff9b78260536a9b6030bddb0b09da689d1687,
+            });
+
             try testArgs(@Vector(1, u1024), .{
                 0x0ca1a0dfaf8bb1da714b457d23c71aef948e66c7cd45c0aa941498a796fb18502ec32f34e885d0a107d44ae81595f8b52c2f0fb38e584b7139903a0e8a823ae20d01ca0662722dd474e7efc40f32d74cc065d97d8a09d0447f1ab6107fa0a57f3f8c866ae872506627ce82f18add79cee8dc69837f4ead3ca770c4d622d7e544,
             }, .{
@@ -1778,3 +2114,19 @@ test bitXor {
     try Binary(bitXor).testIntTypes();
     try Binary(bitXor).testIntVectorTypes();
 }
+
+inline fn min(comptime Type: type, lhs: Type, rhs: Type) Type {
+    return @min(lhs, rhs);
+}
+test min {
+    try Binary(min).testIntTypes();
+    try Binary(min).testIntVectorTypes();
+}
+
+inline fn max(comptime Type: type, lhs: Type, rhs: Type) Type {
+    return @max(lhs, rhs);
+}
+test max {
+    try Binary(max).testIntTypes();
+    try Binary(max).testIntVectorTypes();
+}
test/behavior/maximum_minimum.zig
@@ -33,8 +33,7 @@ test "@max on vectors" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and
-        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -89,8 +88,7 @@ test "@min for vectors" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64 and
-        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -304,7 +302,7 @@ test "@min/@max notices bounds from vector types when element of comptime-known
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64 and
-        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .avx)) return error.SkipZigTest;
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
 
     var x: @Vector(2, u32) = .{ 1_000_000, 12345 };
     _ = &x;
test/cases/compile_errors/anytype_param_requires_comptime.zig
@@ -15,6 +15,6 @@ pub export fn entry() void {
 // error
 //
 // :7:25: error: unable to resolve comptime value
-// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_447.C' must be comptime-known
+// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_463.C' must be comptime-known
 // :4:16: note: struct requires comptime because of this field
 // :4:16: note: types are not available at runtime
test/cases/compile_errors/bogus_method_call_on_slice.zig
@@ -16,5 +16,5 @@ pub export fn entry2() void {
 //
 // :3:6: error: no field or member function named 'copy' in '[]const u8'
 // :9:8: error: no field or member function named 'bar' in '@TypeOf(.{})'
-// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_451'
+// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_467'
 // :12:6: note: struct declared here
test/cases/compile_errors/coerce_anon_struct.zig
@@ -6,6 +6,6 @@ export fn foo() void {
 
 // error
 //
-// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_440'
+// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_456'
 // :3:16: note: struct declared here
 // :1:11: note: struct declared here