Commit cea9ac772a

Jacob Young <jacobly0@users.noreply.github.com>
2023-05-15 08:55:41
x86_64: implement integer vector min/max
1 parent 40457a3
Changed files (4)
src/arch/x86_64/CodeGen.zig
@@ -6534,6 +6534,34 @@ fn genBinOp(
                         .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+                        .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx))
+                                .{ .vp_b, .mins }
+                            else if (self.hasFeature(.sse4_1))
+                                .{ .p_b, .mins }
+                            else
+                                null,
+                            .unsigned => if (self.hasFeature(.avx))
+                                .{ .vp_b, .minu }
+                            else if (self.hasFeature(.sse4_1))
+                                .{ .p_b, .minu }
+                            else
+                                null,
+                        },
+                        .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx))
+                                .{ .vp_b, .maxs }
+                            else if (self.hasFeature(.sse4_1))
+                                .{ .p_b, .maxs }
+                            else
+                                null,
+                            .unsigned => if (self.hasFeature(.avx))
+                                .{ .vp_b, .maxu }
+                            else if (self.hasFeature(.sse4_1))
+                                .{ .p_b, .maxu }
+                            else
+                                null,
+                        },
                         else => null,
                     },
                     17...32 => switch (air_tag) {
@@ -6546,6 +6574,14 @@ fn genBinOp(
                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+                        .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
+                            .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
+                        },
+                        .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
+                            .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
+                        },
                         else => null,
                     },
                     else => null,
@@ -6564,6 +6600,26 @@ fn genBinOp(
                         .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+                        .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx))
+                                .{ .vp_w, .mins }
+                            else
+                                .{ .p_w, .mins },
+                            .unsigned => if (self.hasFeature(.avx))
+                                .{ .vp_w, .minu }
+                            else
+                                .{ .p_w, .minu },
+                        },
+                        .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx))
+                                .{ .vp_w, .maxs }
+                            else
+                                .{ .p_w, .maxs },
+                            .unsigned => if (self.hasFeature(.avx))
+                                .{ .vp_w, .maxu }
+                            else
+                                .{ .p_w, .maxu },
+                        },
                         else => null,
                     },
                     9...16 => switch (air_tag) {
@@ -6579,6 +6635,14 @@ fn genBinOp(
                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+                        .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
+                            .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
+                        },
+                        .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
+                            .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
+                        },
                         else => null,
                     },
                     else => null,
@@ -6602,6 +6666,34 @@ fn genBinOp(
                         .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
                         .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
                         .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+                        .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx))
+                                .{ .vp_d, .mins }
+                            else if (self.hasFeature(.sse4_1))
+                                .{ .p_d, .mins }
+                            else
+                                null,
+                            .unsigned => if (self.hasFeature(.avx))
+                                .{ .vp_d, .minu }
+                            else if (self.hasFeature(.sse4_1))
+                                .{ .p_d, .minu }
+                            else
+                                null,
+                        },
+                        .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx))
+                                .{ .vp_d, .maxs }
+                            else if (self.hasFeature(.sse4_1))
+                                .{ .p_d, .maxs }
+                            else
+                                null,
+                            .unsigned => if (self.hasFeature(.avx))
+                                .{ .vp_d, .maxu }
+                            else if (self.hasFeature(.sse4_1))
+                                .{ .p_d, .maxu }
+                            else
+                                null,
+                        },
                         else => null,
                     },
                     5...8 => switch (air_tag) {
@@ -6617,6 +6709,14 @@ fn genBinOp(
                         .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
                         .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
                         .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+                        .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
+                            .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
+                        },
+                        .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+                            .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
+                            .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
+                        },
                         else => null,
                     },
                     else => null,
src/arch/x86_64/Encoding.zig
@@ -280,6 +280,7 @@ pub const Mnemonic = enum {
     mulps, mulss,
     orps,
     pextrw, pinsrw,
+    pmaxsw, pmaxub, pminsw, pminub,
     shufps,
     sqrtps, sqrtss,
     subps, subss,
@@ -318,6 +319,7 @@ pub const Mnemonic = enum {
     insertps,
     pextrb, pextrd, pextrq,
     pinsrb, pinsrd, pinsrq,
+    pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
     pmulld,
     roundpd, roundps, roundsd, roundss,
     // AVX
@@ -349,6 +351,8 @@ pub const Mnemonic = enum {
     vpand, vpandn,
     vpextrb, vpextrd, vpextrq, vpextrw,
     vpinsrb, vpinsrd, vpinsrq, vpinsrw,
+    vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
+    vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
     vpmulhw, vpmulld, vpmullw,
     vpor,
     vpshufhw, vpshuflw,
src/arch/x86_64/encodings.zig
@@ -1011,6 +1011,14 @@ pub const table = [_]Entry{
 
     .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
 
+    .{ .pmaxsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 },
+
+    .{ .pmaxub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 },
+
+    .{ .pminsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .none, .sse2 },
+
+    .{ .pminub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 },
+
     .{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 },
 
     .{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 },
@@ -1091,6 +1099,20 @@ pub const table = [_]Entry{
     .{ .pinsrd, .rmi, &.{ .xmm, .rm32,   .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 },
     .{ .pinsrq, .rmi, &.{ .xmm, .rm64,   .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 },
 
+    .{ .pmaxsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .none, .sse4_1 },
+    .{ .pmaxsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .none, .sse4_1 },
+
+    .{ .pmaxuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .none, .sse4_1 },
+
+    .{ .pmaxud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .none, .sse4_1 },
+
+    .{ .pminsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .none, .sse4_1 },
+    .{ .pminsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .none, .sse4_1 },
+
+    .{ .pminuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .none, .sse4_1 },
+
+    .{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 },
+
     .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 },
 
     .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
@@ -1318,6 +1340,24 @@ pub const table = [_]Entry{
 
     .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
 
+    .{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx },
+    .{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f,       0xee }, 0, .vex_128_wig, .avx },
+    .{ .vpmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx },
+
+    .{ .vpmaxub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f,       0xde }, 0, .vex_128_wig, .avx },
+    .{ .vpmaxuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_128_wig, .avx },
+
+    .{ .vpmaxud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_128_wig, .avx },
+
+    .{ .vpminsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_128_wig, .avx },
+    .{ .vpminsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f,       0xea }, 0, .vex_128_wig, .avx },
+    .{ .vpminsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_128_wig, .avx },
+
+    .{ .vpminub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f,       0xda }, 0, .vex_128_wig, .avx },
+    .{ .vpminuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_128_wig, .avx },
+
+    .{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx },
+
     .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
 
     .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
@@ -1449,6 +1489,24 @@ pub const table = [_]Entry{
 
     .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
 
+    .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx },
+    .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f,       0xee }, 0, .vex_256_wig, .avx },
+    .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx },
+
+    .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f,       0xde }, 0, .vex_256_wig, .avx },
+    .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx },
+
+    .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx },
+
+    .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx },
+    .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f,       0xea }, 0, .vex_256_wig, .avx },
+    .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx },
+
+    .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f,       0xda }, 0, .vex_256_wig, .avx },
+    .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx },
+
+    .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx },
+
     .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx },
 
     .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx },
src/arch/x86_64/Mir.zig
@@ -453,6 +453,14 @@ pub const Inst = struct {
         /// Bitwise logical and not of packed single-precision floating-point values
         /// Bitwise logical and not of packed double-precision floating-point values
         andn,
+        /// Maximum of packed signed integers
+        maxs,
+        /// Maximum of packed unsigned integers
+        maxu,
+        /// Minimum of packed signed integers
+        mins,
+        /// Minimum of packed unsigned integers
+        minu,
         /// Multiply packed signed integers and store low result
         mull,
         /// Multiply packed signed integers and store high result