Commit bd771bec49
Changed files (4)
src
arch
src/arch/x86_64/CodeGen.zig
@@ -6520,6 +6520,57 @@ fn genBinOp(
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
else => null,
+ .Int => switch (lhs_ty.childType().intInfo(self.target.*).bits) {
+ 8 => switch (lhs_ty.vectorLen()) {
+ 1...16 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add },
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub },
+ else => null,
+ },
+ else => null,
+ },
+ 16 => switch (lhs_ty.vectorLen()) {
+ 1...8 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add },
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub },
+ else => null,
+ },
+ else => null,
+ },
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1...4 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add },
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub },
+ else => null,
+ },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1...2 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add },
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub },
+ else => null,
+ },
+ else => null,
+ },
+ else => null,
+ },
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen()) {
1 => {
@@ -6812,7 +6863,7 @@ fn genBinOp(
);
}
switch (air_tag) {
- .add, .sub, .mul, .div_float, .div_exact => {},
+ .add, .addwrap, .sub, .subwrap, .mul, .mulwrap, .div_float, .div_exact => {},
.div_trunc, .div_floor => try self.genRound(
lhs_ty,
dst_reg,
@@ -9043,14 +9094,33 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
.{ .register = try self.copyToTmpRegister(ty, src_mcv) },
),
.sse => try self.asmRegisterRegister(
- switch (ty.scalarType().zigTypeTag()) {
- else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
+ if (@as(?Mir.Inst.FixedTag, switch (ty.scalarType().zigTypeTag()) {
+ else => switch (abi_size) {
+ 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
+ 9...16 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
+ 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
+ else => null,
+ },
.Float => switch (ty.floatBits(self.target.*)) {
- else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
+ 16, 128 => switch (abi_size) {
+ 2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
+ 9...16 => if (self.hasFeature(.avx))
+ .{ .v_, .movdqa }
+ else
+ .{ ._, .movdqa },
+ 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
+ else => null,
+ },
32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
+ 80 => null,
+ else => unreachable,
},
- },
+ })) |tag| tag else return self.fail("TODO implement genSetReg for {}", .{
+ ty.fmt(self.bin_file.options.module.?),
+ }),
registerAlias(dst_reg, abi_size),
registerAlias(src_reg, abi_size),
),
src/arch/x86_64/Encoding.zig
@@ -262,7 +262,9 @@ pub const Mnemonic = enum {
fisttp, fld,
// MMX
movd, movq,
+ paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw,
pand, pandn, por, pxor,
+ psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw,
// SSE
addps, addss,
andps,
@@ -341,12 +343,14 @@ pub const Mnemonic = enum {
vmovupd, vmovups,
vmulpd, vmulps, vmulsd, vmulss,
vorpd, vorps,
+ vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
vpand, vpandn,
vpextrb, vpextrd, vpextrq, vpextrw,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
vpor,
vpshufhw, vpshuflw,
vpsrld, vpsrlq, vpsrlw,
+ vpsubb, vpsubd, vpsubq, vpsubsb, vpsubsw, vpsubusb, vpsubusw, vpsubw,
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
vpxor,
@@ -746,7 +750,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
}
const mnemonic_to_encodings_map = init: {
- @setEvalBranchQuota(25_000);
+ @setEvalBranchQuota(30_000);
const encodings = @import("encodings.zig");
var entries = encodings.table;
std.sort.sort(encodings.Entry, &entries, {}, struct {
src/arch/x86_64/encodings.zig
@@ -992,6 +992,17 @@ pub const table = [_]Entry{
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 },
+ .{ .paddb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfc }, 0, .none, .sse2 },
+ .{ .paddw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfd }, 0, .none, .sse2 },
+ .{ .paddd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfe }, 0, .none, .sse2 },
+ .{ .paddq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .none, .sse2 },
+
+ .{ .paddsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xec }, 0, .none, .sse2 },
+ .{ .paddsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xed }, 0, .none, .sse2 },
+
+ .{ .paddusb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdc }, 0, .none, .sse2 },
+ .{ .paddusw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdd }, 0, .none, .sse2 },
+
.{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 },
.{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 },
@@ -1013,6 +1024,18 @@ pub const table = [_]Entry{
.{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 },
.{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 },
+ .{ .psubb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .none, .sse2 },
+ .{ .psubw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .none, .sse2 },
+ .{ .psubd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfa }, 0, .none, .sse2 },
+
+ .{ .psubsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .none, .sse2 },
+ .{ .psubsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .none, .sse2 },
+
+ .{ .psubq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfb }, 0, .none, .sse2 },
+
+ .{ .psubusb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .none, .sse2 },
+ .{ .psubusw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .none, .sse2 },
+
.{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 },
.{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 },
.{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 },
@@ -1261,6 +1284,17 @@ pub const table = [_]Entry{
.{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
.{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
+ .{ .vpaddb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_128_wig, .avx },
+ .{ .vpaddw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_128_wig, .avx },
+ .{ .vpaddd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfe }, 0, .vex_128_wig, .avx },
+ .{ .vpaddq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpaddsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xec }, 0, .vex_128_wig, .avx },
+ .{ .vpaddsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xed }, 0, .vex_128_wig, .avx },
+
+ .{ .vpaddusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdc }, 0, .vex_128_wig, .avx },
+ .{ .vpaddusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdd }, 0, .vex_128_wig, .avx },
+
.{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx },
.{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx },
@@ -1287,6 +1321,18 @@ pub const table = [_]Entry{
.{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128_wig, .avx },
.{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128_wig, .avx },
+ .{ .vpsubb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_128_wig, .avx },
+ .{ .vpsubw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_128_wig, .avx },
+ .{ .vpsubd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfa }, 0, .vex_128_wig, .avx },
+
+ .{ .vpsubsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .vex_128_wig, .avx },
+ .{ .vpsubsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpsubq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfb }, 0, .vex_128_wig, .avx },
+
+ .{ .vpsubusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_128_wig, .avx },
+ .{ .vpsubusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_128_wig, .avx },
+
.{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx },
.{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx },
.{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx },
@@ -1376,6 +1422,17 @@ pub const table = [_]Entry{
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
+ .{ .vpaddb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfe }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpaddsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xec }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xed }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpaddusb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdc }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddusw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdd }, 0, .vex_256_wig, .avx2 },
+
.{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 },
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
@@ -1389,6 +1446,18 @@ pub const table = [_]Entry{
.{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 },
.{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 },
+ .{ .vpsubb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsubw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsubd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfa }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpsubsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsubsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpsubq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfb }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpsubusb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsubusw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_256_wig, .avx2 },
+
.{ .vpunpckhbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpckhwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpckhdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_256_wig, .avx2 },
src/arch/x86_64/Mir.zig
@@ -288,6 +288,7 @@ pub const Inst = struct {
/// Add with carry
adc,
/// Add
+ /// Add packed integers
/// Add packed single-precision floating-point values
/// Add scalar single-precision floating-point values
/// Add packed double-precision floating-point values
@@ -420,6 +421,7 @@ pub const Inst = struct {
/// Double precision shift right
sh,
/// Subtract
+ /// Subtract packed integers
/// Subtract packed single-precision floating-point values
/// Subtract scalar single-precision floating-point values
/// Subtract packed double-precision floating-point values
@@ -444,9 +446,18 @@ pub const Inst = struct {
/// Bitwise logical xor of packed double-precision floating-point values
xor,
+ /// Add packed signed integers with signed saturation
+ adds,
+ /// Add packed unsigned integers with unsigned saturation
+ addus,
/// Bitwise logical and not of packed single-precision floating-point values
/// Bitwise logical and not of packed double-precision floating-point values
andn,
+ /// Subtract packed signed integers with signed saturation
+ subs,
+ /// Subtract packed unsigned integers with unsigned saturation
+ subus,
+
/// Convert packed doubleword integers to packed single-precision floating-point values
/// Convert packed doubleword integers to packed double-precision floating-point values
cvtpi2,