Commit cea9ac772a
Changed files (4)
src
arch
src/arch/x86_64/CodeGen.zig
@@ -6534,6 +6534,34 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_b, .mins }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_b, .mins }
+ else
+ null,
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_b, .minu }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_b, .minu }
+ else
+ null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_b, .maxs }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_b, .maxs }
+ else
+ null,
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_b, .maxu }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_b, .maxu }
+ else
+ null,
+ },
else => null,
},
17...32 => switch (air_tag) {
@@ -6546,6 +6574,14 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
+ .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
+ },
else => null,
},
else => null,
@@ -6564,6 +6600,26 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_w, .mins }
+ else
+ .{ .p_w, .mins },
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_w, .minu }
+ else
+ .{ .p_w, .minu },
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_w, .maxs }
+ else
+ .{ .p_w, .maxs },
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_w, .maxu }
+ else
+ .{ .p_w, .maxu },
+ },
else => null,
},
9...16 => switch (air_tag) {
@@ -6579,6 +6635,14 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
+ .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
+ },
else => null,
},
else => null,
@@ -6602,6 +6666,34 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_d, .mins }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .mins }
+ else
+ null,
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_d, .minu }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .minu }
+ else
+ null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_d, .maxs }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .maxs }
+ else
+ null,
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_d, .maxu }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .maxu }
+ else
+ null,
+ },
else => null,
},
5...8 => switch (air_tag) {
@@ -6617,6 +6709,14 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
+ .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
+ },
else => null,
},
else => null,
src/arch/x86_64/Encoding.zig
@@ -280,6 +280,7 @@ pub const Mnemonic = enum {
mulps, mulss,
orps,
pextrw, pinsrw,
+ pmaxsw, pmaxub, pminsw, pminub,
shufps,
sqrtps, sqrtss,
subps, subss,
@@ -318,6 +319,7 @@ pub const Mnemonic = enum {
insertps,
pextrb, pextrd, pextrq,
pinsrb, pinsrd, pinsrq,
+ pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
pmulld,
roundpd, roundps, roundsd, roundss,
// AVX
@@ -349,6 +351,8 @@ pub const Mnemonic = enum {
vpand, vpandn,
vpextrb, vpextrd, vpextrq, vpextrw,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
+ vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
+ vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
vpmulhw, vpmulld, vpmullw,
vpor,
vpshufhw, vpshuflw,
src/arch/x86_64/encodings.zig
@@ -1011,6 +1011,14 @@ pub const table = [_]Entry{
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
+ .{ .pmaxsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 },
+
+ .{ .pmaxub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 },
+
+ .{ .pminsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .none, .sse2 },
+
+ .{ .pminub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 },
+
.{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 },
.{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 },
@@ -1091,6 +1099,20 @@ pub const table = [_]Entry{
.{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 },
.{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 },
+ .{ .pmaxsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .none, .sse4_1 },
+ .{ .pmaxsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .none, .sse4_1 },
+
+ .{ .pmaxuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .none, .sse4_1 },
+
+ .{ .pmaxud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .none, .sse4_1 },
+
+ .{ .pminsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .none, .sse4_1 },
+ .{ .pminsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .none, .sse4_1 },
+
+ .{ .pminuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .none, .sse4_1 },
+
+ .{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 },
+
.{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 },
.{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
@@ -1318,6 +1340,24 @@ pub const table = [_]Entry{
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
+ .{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx },
+ .{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx },
+ .{ .vpmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmaxub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_128_wig, .avx },
+ .{ .vpmaxuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmaxud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_128_wig, .avx },
+
+ .{ .vpminsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_128_wig, .avx },
+ .{ .vpminsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_128_wig, .avx },
+ .{ .vpminsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpminub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_128_wig, .avx },
+ .{ .vpminuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_128_wig, .avx },
+
+ .{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx },
+
.{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
.{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
@@ -1449,6 +1489,24 @@ pub const table = [_]Entry{
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx },
+ .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx },
+ .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx },
+
+ .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx },
+ .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx },
+
+ .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx },
+
+ .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx },
+ .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx },
+ .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx },
+
+ .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx },
+ .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx },
+
+ .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx },
+
.{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx },
.{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx },
src/arch/x86_64/Mir.zig
@@ -453,6 +453,14 @@ pub const Inst = struct {
/// Bitwise logical and not of packed single-precision floating-point values
/// Bitwise logical and not of packed double-precision floating-point values
andn,
+ /// Maximum of packed signed integers
+ maxs,
+ /// Maximum of packed unsigned integers
+ maxu,
+ /// Minimum of packed signed integers
+ mins,
+ /// Minimum of packed unsigned integers
+ minu,
/// Multiply packed signed integers and store low result
mull,
/// Multiply packed signed integers and store high result