Commit 403c2d91be
Changed files (7)
test
behavior
src/arch/x86_64/CodeGen.zig
@@ -1271,6 +1271,27 @@ fn asmRegisterRegisterRegister(
});
}
+fn asmRegisterRegisterRegisterRegister(
+ self: *Self,
+ tag: Mir.Inst.FixedTag,
+ reg1: Register,
+ reg2: Register,
+ reg3: Register,
+ reg4: Register,
+) !void {
+ _ = try self.addInst(.{
+ .tag = tag[1],
+ .ops = .rrrr,
+ .data = .{ .rrrr = .{
+ .fixes = tag[0],
+ .r1 = reg1,
+ .r2 = reg2,
+ .r3 = reg3,
+ .r4 = reg4,
+ } },
+ });
+}
+
fn asmRegisterRegisterRegisterImmediate(
self: *Self,
tag: Mir.Inst.FixedTag,
@@ -6224,12 +6245,26 @@ fn genBinOp(
lhs_air: Air.Inst.Ref,
rhs_air: Air.Inst.Ref,
) !MCValue {
- const lhs_mcv = try self.resolveInst(lhs_air);
- const rhs_mcv = try self.resolveInst(rhs_air);
const lhs_ty = self.air.typeOf(lhs_air);
const rhs_ty = self.air.typeOf(rhs_air);
const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*));
+ const maybe_mask_reg = switch (air_tag) {
+ else => null,
+ .max, .min => if (lhs_ty.scalarType().isRuntimeFloat()) registerAlias(
+ if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: {
+ try self.register_manager.getReg(.xmm0, null);
+ break :mask .xmm0;
+ } else try self.register_manager.allocReg(null, sse),
+ abi_size,
+ ) else null,
+ };
+ const mask_lock =
+ if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
+ defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const lhs_mcv = try self.resolveInst(lhs_air);
+ const rhs_mcv = try self.resolveInst(rhs_air);
switch (lhs_mcv) {
.immediate => |imm| switch (imm) {
0 => switch (air_tag) {
@@ -6300,7 +6335,16 @@ fn genBinOp(
};
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
- const src_mcv = if (flipped) lhs_mcv else rhs_mcv;
+ const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv;
+ const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg|
+ if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and
+ self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: {
+ try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv);
+ break :src .{ .register = mask_reg };
+ }
+ else
+ unmat_src_mcv;
+
if (!vec_op) {
switch (air_tag) {
.add,
@@ -7009,18 +7053,26 @@ fn genBinOp(
})) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
@tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
});
+
+ const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias(
+ if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?,
+ abi_size,
+ ) else null;
+ const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null;
+ defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock);
+
if (self.hasFeature(.avx)) {
- const src1_alias =
+ const lhs_reg =
if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
mir_tag,
dst_reg,
- src1_alias,
+ lhs_reg,
src_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
) else try self.asmRegisterRegisterRegister(
mir_tag,
dst_reg,
- src1_alias,
+ lhs_reg,
registerAlias(if (src_mcv.isRegister())
src_mcv.getReg().?
else
@@ -7041,9 +7093,10 @@ fn genBinOp(
try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size),
);
}
+
switch (air_tag) {
.add, .addwrap, .sub, .subwrap, .mul, .mulwrap, .div_float, .div_exact => {},
- .div_trunc, .div_floor => try self.genRound(
+ .div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) try self.genRound(
lhs_ty,
dst_reg,
.{ .register = dst_reg },
@@ -7052,11 +7105,240 @@ fn genBinOp(
.div_floor => 0b1_0_01,
else => unreachable,
},
- ),
+ ) else return self.fail("TODO implement genBinOp for {s} {} without sse4_1 feature", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
.bit_and, .bit_or, .xor => {},
- .max, .min => {}, // TODO: unordered select
+ .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) {
+ const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size);
+
+ try self.asmRegisterRegisterRegisterImmediate(
+ if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => .{ .v_ss, .cmp },
+ 64 => .{ .v_sd, .cmp },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1 => .{ .v_ss, .cmp },
+ 2...8 => .{ .v_ps, .cmp },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1 => .{ .v_sd, .cmp },
+ 2...4 => .{ .v_pd, .cmp },
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => unreachable,
+ },
+ else => unreachable,
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
+ mask_reg,
+ rhs_copy_reg,
+ rhs_copy_reg,
+ Immediate.u(3), // unord
+ );
+ try self.asmRegisterRegisterRegisterRegister(
+ if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => .{ .v_ps, .blendv },
+ 64 => .{ .v_pd, .blendv },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1...8 => .{ .v_ps, .blendv },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1...4 => .{ .v_pd, .blendv },
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => unreachable,
+ },
+ else => unreachable,
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
+ dst_reg,
+ dst_reg,
+ lhs_copy_reg.?,
+ mask_reg,
+ );
+ } else {
+ const has_blend = self.hasFeature(.sse4_1);
+ try self.asmRegisterRegisterImmediate(
+ if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => .{ ._ss, .cmp },
+ 64 => .{ ._sd, .cmp },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1 => .{ ._ss, .cmp },
+ 2...4 => .{ ._ps, .cmp },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1 => .{ ._sd, .cmp },
+ 2 => .{ ._pd, .cmp },
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => unreachable,
+ },
+ else => unreachable,
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
+ mask_reg,
+ mask_reg,
+ Immediate.u(if (has_blend) 3 else 7), // unord, ord
+ );
+ if (has_blend) try self.asmRegisterRegisterRegister(
+ if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => .{ ._ps, .blendv },
+ 64 => .{ ._pd, .blendv },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1...4 => .{ ._ps, .blendv },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1...2 => .{ ._pd, .blendv },
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => unreachable,
+ },
+ else => unreachable,
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
+ dst_reg,
+ lhs_copy_reg.?,
+ mask_reg,
+ ) else {
+ try self.asmRegisterRegister(
+ if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => .{ ._ps, .@"and" },
+ 64 => .{ ._pd, .@"and" },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1...4 => .{ ._ps, .@"and" },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1...2 => .{ ._pd, .@"and" },
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => unreachable,
+ },
+ else => unreachable,
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
+ dst_reg,
+ mask_reg,
+ );
+ try self.asmRegisterRegister(
+ if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => .{ ._ps, .andn },
+ 64 => .{ ._pd, .andn },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1...4 => .{ ._ps, .andn },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1...2 => .{ ._pd, .andn },
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => unreachable,
+ },
+ else => unreachable,
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
+ mask_reg,
+ lhs_copy_reg.?,
+ );
+ try self.asmRegisterRegister(
+ if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => .{ ._ps, .@"or" },
+ 64 => .{ ._pd, .@"or" },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType().zigTypeTag()) {
+ .Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1...4 => .{ ._ps, .@"or" },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1...2 => .{ ._pd, .@"or" },
+ else => null,
+ },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => unreachable,
+ },
+ else => unreachable,
+ })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?),
+ }),
+ dst_reg,
+ mask_reg,
+ );
+ }
+ },
else => unreachable,
}
+
return dst_mcv;
}
@@ -9282,7 +9564,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
else => null,
},
- .Float => switch (ty.floatBits(self.target.*)) {
+ .Float => switch (ty.scalarType().floatBits(self.target.*)) {
16, 128 => switch (abi_size) {
2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
src/arch/x86_64/encoder.zig
@@ -226,8 +226,8 @@ pub const Instruction = struct {
else => {
const mem_op = switch (data.op_en) {
.m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
- .rm, .rmi, .vmi => inst.ops[1],
- .rvm, .rvmi => inst.ops[2],
+ .rm, .rmi, .rm0, .vmi => inst.ops[1],
+ .rvm, .rvmr, .rvmi => inst.ops[2],
else => unreachable,
};
switch (mem_op) {
@@ -235,7 +235,7 @@ pub const Instruction = struct {
const rm = switch (data.op_en) {
.m, .mi, .m1, .mc, .vmi => enc.modRmExt(),
.mr, .mri, .mrc => inst.ops[1].reg.lowEnc(),
- .rm, .rmi, .rvm, .rvmi => inst.ops[0].reg.lowEnc(),
+ .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0].reg.lowEnc(),
.mvr => inst.ops[2].reg.lowEnc(),
else => unreachable,
};
@@ -245,7 +245,7 @@ pub const Instruction = struct {
const op = switch (data.op_en) {
.m, .mi, .m1, .mc, .vmi => .none,
.mr, .mri, .mrc => inst.ops[1],
- .rm, .rmi, .rvm, .rvmi => inst.ops[0],
+ .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0],
.mvr => inst.ops[2],
else => unreachable,
};
@@ -257,6 +257,7 @@ pub const Instruction = struct {
switch (data.op_en) {
.mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
.rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder),
+ .rvmr => try encoder.imm8(@as(u8, inst.ops[3].reg.enc()) << 4),
.rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder),
else => {},
}
@@ -298,7 +299,7 @@ pub const Instruction = struct {
.i, .zi, .o, .oi, .d, .np => null,
.fd => inst.ops[1].mem.base().reg,
.td => inst.ops[0].mem.base().reg,
- .rm, .rmi => if (inst.ops[1].isSegmentRegister())
+ .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister())
switch (inst.ops[1]) {
.reg => |reg| reg,
.mem => |mem| mem.base().reg,
@@ -314,7 +315,7 @@ pub const Instruction = struct {
}
else
null,
- .vmi, .rvm, .rvmi, .mvr => unreachable,
+ .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
};
if (segment_override) |seg| {
legacy.setSegmentOverride(seg);
@@ -333,23 +334,23 @@ pub const Instruction = struct {
switch (op_en) {
.np, .i, .zi, .fd, .td, .d => {},
.o, .oi => rex.b = inst.ops[0].reg.isExtended(),
- .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc => {
+ .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0 => {
const r_op = switch (op_en) {
- .rm, .rmi => inst.ops[0],
+ .rm, .rmi, .rm0 => inst.ops[0],
.mr, .mri, .mrc => inst.ops[1],
else => .none,
};
rex.r = r_op.isBaseExtended();
const b_x_op = switch (op_en) {
- .rm, .rmi => inst.ops[1],
+ .rm, .rmi, .rm0 => inst.ops[1],
.m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0],
else => unreachable,
};
rex.b = b_x_op.isBaseExtended();
rex.x = b_x_op.isIndexExtended();
},
- .vmi, .rvm, .rvmi, .mvr => unreachable,
+ .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
}
try encoder.rex(rex);
@@ -367,9 +368,9 @@ pub const Instruction = struct {
switch (op_en) {
.np, .i, .zi, .fd, .td, .d => {},
.o, .oi => vex.b = inst.ops[0].reg.isExtended(),
- .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi, .mvr => {
+ .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => {
const r_op = switch (op_en) {
- .rm, .rmi, .rvm, .rvmi => inst.ops[0],
+ .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0],
.mr, .mri, .mrc => inst.ops[1],
.mvr => inst.ops[2],
.m, .mi, .m1, .mc, .vmi => .none,
@@ -378,9 +379,9 @@ pub const Instruction = struct {
vex.r = r_op.isBaseExtended();
const b_x_op = switch (op_en) {
- .rm, .rmi, .vmi => inst.ops[1],
+ .rm, .rmi, .rm0, .vmi => inst.ops[1],
.m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
- .rvm, .rvmi => inst.ops[2],
+ .rvm, .rvmr, .rvmi => inst.ops[2],
else => unreachable,
};
vex.b = b_x_op.isBaseExtended();
@@ -408,7 +409,7 @@ pub const Instruction = struct {
switch (op_en) {
else => {},
.vmi => vex.v = inst.ops[0].reg,
- .rvm, .rvmi => vex.v = inst.ops[1].reg,
+ .rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg,
}
try encoder.vex(vex);
src/arch/x86_64/Encoding.zig
@@ -178,7 +178,7 @@ pub fn format(
try writer.print("+{s} ", .{tag});
},
.m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}),
- .mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi, .mvr => try writer.writeAll("/r "),
+ .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr => try writer.writeAll("/r "),
}
switch (encoding.data.op_en) {
@@ -202,7 +202,8 @@ pub fn format(
};
try writer.print("{s} ", .{tag});
},
- .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm, .mvr => {},
+ .rvmr => try writer.writeAll("/is4 "),
+ .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {},
}
try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
@@ -270,7 +271,7 @@ pub const Mnemonic = enum {
addps, addss,
andps,
andnps,
- cmpss,
+ cmpps, cmpss,
cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si,
divps, divss,
maxps, maxss,
@@ -290,7 +291,7 @@ pub const Mnemonic = enum {
addpd, addsd,
andpd,
andnpd,
- //cmpsd,
+ cmppd, //cmpsd,
cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd,
cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd,
cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si,
@@ -315,6 +316,7 @@ pub const Mnemonic = enum {
// SSE3
movddup, movshdup, movsldup,
// SSE4.1
+ blendpd, blendps, blendvpd, blendvps,
extractps,
insertps,
pextrb, pextrd, pextrq,
@@ -325,7 +327,9 @@ pub const Mnemonic = enum {
// AVX
vaddpd, vaddps, vaddsd, vaddss,
vandnpd, vandnps, vandpd, vandps,
+ vblendpd, vblendps, vblendvpd, vblendvps,
vbroadcastf128, vbroadcastsd, vbroadcastss,
+ vcmppd, vcmpps, vcmpsd, vcmpss,
vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si,
@@ -385,7 +389,7 @@ pub const OpEn = enum {
fd, td,
m1, mc, mi, mr, rm,
rmi, mri, mrc,
- vmi, rvm, rvmi, mvr,
+ rm0, vmi, rvm, rvmr, rvmi, mvr,
// zig fmt: on
};
@@ -407,7 +411,7 @@ pub const Op = enum {
moffs,
sreg,
st, mm, mm_m64,
- xmm, xmm_m32, xmm_m64, xmm_m128,
+ xmm0, xmm, xmm_m32, xmm_m64, xmm_m128,
ymm, ymm_m256,
// zig fmt: on
@@ -436,7 +440,9 @@ pub const Op = enum {
.segment => .sreg,
.x87 => .st,
.mmx => .mm,
- .sse => switch (reg.bitSize()) {
+ .sse => if (reg == .xmm0)
+ .xmm0
+ else switch (reg.bitSize()) {
128 => .xmm,
256 => .ymm,
else => unreachable,
@@ -494,7 +500,7 @@ pub const Op = enum {
.eax, .r32, .rm32, .r32_m16 => unreachable,
.rax, .r64, .rm64, .r64_m16 => unreachable,
.st, .mm, .mm_m64 => unreachable,
- .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
+ .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
.ymm, .ymm_m256 => unreachable,
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
.unity => 1,
@@ -516,7 +522,7 @@ pub const Op = enum {
.eax, .r32, .rm32, .r32_m8, .r32_m16 => 32,
.rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64,
.st => 80,
- .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
+ .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
.ymm, .ymm_m256 => 256,
};
}
@@ -526,7 +532,8 @@ pub const Op = enum {
.none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
.rel8, .rel16, .rel32 => unreachable,
- .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .st, .mm, .xmm, .ymm => unreachable,
+ .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64 => unreachable,
+ .st, .mm, .xmm0, .xmm, .ymm => unreachable,
.m8, .rm8, .r32_m8 => 8,
.m16, .rm16, .r32_m16, .r64_m16 => 16,
.m32, .rm32, .xmm_m32 => 32,
@@ -558,7 +565,7 @@ pub const Op = enum {
.rm8, .rm16, .rm32, .rm64,
.r32_m8, .r32_m16, .r64_m16,
.st, .mm, .mm_m64,
- .xmm, .xmm_m32, .xmm_m64, .xmm_m128,
+ .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128,
.ymm, .ymm_m256,
=> true,
else => false,
@@ -612,7 +619,7 @@ pub const Op = enum {
.sreg => .segment,
.st => .x87,
.mm, .mm_m64 => .mmx,
- .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
+ .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
.ymm, .ymm_m256 => .sse,
};
}
@@ -629,7 +636,7 @@ pub const Op = enum {
else => {
if (op.isRegister() and target.isRegister()) {
return switch (target) {
- .cl, .al, .ax, .eax, .rax => op == target,
+ .cl, .al, .ax, .eax, .rax, .xmm0 => op == target,
else => op.class() == target.class() and op.regBitSize() == target.regBitSize(),
};
}
src/arch/x86_64/encodings.zig
@@ -846,6 +846,8 @@ pub const table = [_]Entry{
.{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .none, .sse },
+ .{ .cmpps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .none, .sse },
+
.{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse },
.{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse },
@@ -917,6 +919,8 @@ pub const table = [_]Entry{
.{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .none, .sse2 },
+ .{ .cmppd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .none, .sse2 },
+
.{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 },
.{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 },
@@ -1085,6 +1089,14 @@ pub const table = [_]Entry{
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
// SSE4.1
+ .{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
+
+ .{ .blendps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .none, .sse4_1 },
+
+ .{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 },
+
+ .{ .blendvps, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 },
+
.{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 },
.{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 },
@@ -1146,11 +1158,33 @@ pub const table = [_]Entry{
.{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
.{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
+ .{ .vblendpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_128_wig, .avx },
+ .{ .vblendpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_256_wig, .avx },
+
+ .{ .vblendps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_128_wig, .avx },
+ .{ .vblendps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_256_wig, .avx },
+
+ .{ .vblendvpd, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_128_w0, .avx },
+ .{ .vblendvpd, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_256_w0, .avx },
+
+ .{ .vblendvps, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_128_w0, .avx },
+ .{ .vblendvps, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_256_w0, .avx },
+
.{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
.{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
.{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
.{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx },
+ .{ .vcmppd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_128_wig, .avx },
+ .{ .vcmppd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_256_wig, .avx },
+
+ .{ .vcmpps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_128_wig, .avx },
+ .{ .vcmpps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_256_wig, .avx },
+
+ .{ .vcmpsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx },
+
+ .{ .vcmpss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx },
+
.{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
.{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
src/arch/x86_64/Lower.zig
@@ -377,6 +377,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.r => inst.data.r.fixes,
.rr => inst.data.rr.fixes,
.rrr => inst.data.rrr.fixes,
+ .rrrr => inst.data.rrrr.fixes,
.rrri => inst.data.rrri.fixes,
.rri_s, .rri_u => inst.data.rri.fixes,
.ri_s, .ri_u => inst.data.ri.fixes,
@@ -430,6 +431,12 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.{ .reg = inst.data.rrr.r2 },
.{ .reg = inst.data.rrr.r3 },
},
+ .rrrr => &.{
+ .{ .reg = inst.data.rrrr.r1 },
+ .{ .reg = inst.data.rrrr.r2 },
+ .{ .reg = inst.data.rrrr.r3 },
+ .{ .reg = inst.data.rrrr.r4 },
+ },
.rrri => &.{
.{ .reg = inst.data.rrri.r1 },
.{ .reg = inst.data.rrri.r2 },
src/arch/x86_64/Mir.zig
@@ -596,6 +596,16 @@ pub const Inst = struct {
/// Replicate single floating-point values
movsldup,
+ /// Blend packed single-precision floating-point values
+ /// Blend scalar single-precision floating-point values
+ /// Blend packed double-precision floating-point values
+ /// Blend scalar double-precision floating-point values
+ blend,
+ /// Variable blend packed single-precision floating-point values
+ /// Variable blend scalar single-precision floating-point values
+ /// Variable blend packed double-precision floating-point values
+ /// Variable blend scalar double-precision floating-point values
+ blendv,
/// Extract packed floating-point values
extract,
/// Insert scalar single-precision floating-point value
@@ -651,6 +661,9 @@ pub const Inst = struct {
/// Register, register, register operands.
/// Uses `rrr` payload.
rrr,
+ /// Register, register, register, register operands.
+ /// Uses `rrrr` payload.
+ rrrr,
/// Register, register, register, immediate (byte) operands.
/// Uses `rrri` payload.
rrri,
@@ -870,6 +883,13 @@ pub const Inst = struct {
r2: Register,
r3: Register,
},
+ rrrr: struct {
+ fixes: Fixes = ._,
+ r1: Register,
+ r2: Register,
+ r3: Register,
+ r4: Register,
+ },
rrri: struct {
fixes: Fixes = ._,
r1: Register,
test/behavior/maximum_minimum.zig
@@ -24,7 +24,8 @@ test "@max" {
test "@max on vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_x86_64 and
+ !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -72,7 +73,8 @@ test "@min" {
test "@min for vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_x86_64 and
+ !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO