Commit 1f5aa7747f
Changed files (2)
src
arch
x86_64
src/arch/x86_64/CodeGen.zig
@@ -2443,7 +2443,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(src_ty, src_mcv);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
mat_src_reg.to128(),
Immediate.u(0b1_00),
@@ -2455,12 +2455,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
}
} else if (src_bits == 64 and dst_bits == 32) {
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
- .{ ._, .vcvtsd2ss },
+ .{ .v_, .cvtsd2ss },
dst_reg,
dst_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegisterRegister(
- .{ ._, .vcvtsd2ss },
+ .{ .v_, .cvtsd2ss },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -2506,22 +2506,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(src_ty, src_mcv);
- try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128());
+ try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
switch (dst_bits) {
32 => {},
- 64 => try self.asmRegisterRegisterRegister(.{ ._, .vcvtss2sd }, dst_reg, dst_reg, dst_reg),
+ 64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg),
else => return self.fail("TODO implement airFpext from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
}),
}
} else if (src_bits == 32 and dst_bits == 64) {
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
- .{ ._, .vcvtss2sd },
+ .{ .v_, .cvtss2sd },
dst_reg,
dst_reg,
src_mcv.mem(.dword),
) else try self.asmRegisterRegisterRegister(
- .{ ._, .vcvtss2sd },
+ .{ .v_, .cvtss2sd },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -4678,8 +4678,8 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
try self.genBinOpMir(switch (ty_bits) {
// No point using an extra prefix byte for *pd which performs the same operation.
16, 32, 64, 128 => switch (tag) {
- .neg => .{ ._, .xorps },
- .fabs => .{ ._, .andnps },
+ .neg => .{ ._ps, .xor },
+ .fabs => .{ ._ps, .andn },
else => unreachable,
},
80 => return self.fail("TODO implement airFloatSign for {}", .{
@@ -4712,23 +4712,23 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4
const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
- 32 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss },
- 64 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd },
+ 32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
+ 64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
32 => switch (ty.vectorLen()) {
- 1 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss },
- 2...4 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else .{ ._, .roundps },
- 5...8 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else null,
+ 1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
+ 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null,
else => null,
},
64 => switch (ty.vectorLen()) {
- 1 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd },
- 2 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else .{ ._, .roundpd },
- 3...4 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else null,
+ 1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
+ 2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round },
+ 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null,
else => null,
},
16, 80, 128 => null,
@@ -4743,8 +4743,8 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4
const abi_size = @intCast(u32, ty.abiSize(self.target.*));
const dst_alias = registerAlias(dst_reg, abi_size);
- switch (mir_tag[1]) {
- .vroundss, .vroundsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ switch (mir_tag[0]) {
+ .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
mir_tag,
dst_alias,
dst_alias,
@@ -4799,18 +4799,18 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv);
- try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128());
- try self.asmRegisterRegisterRegister(.{ ._, .vsqrtss }, dst_reg, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
+ try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
break :result dst_mcv;
} else null,
- 32 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss },
- 64 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd },
+ 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
+ 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
80, 128 => null,
else => unreachable,
},
@@ -4819,7 +4819,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) {
1 => {
try self.asmRegisterRegister(
- .{ ._, .vcvtph2ps },
+ .{ .v_, .cvtph2ps },
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -4827,13 +4827,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
try self.asmRegisterRegisterRegister(
- .{ ._, .vsqrtss },
+ .{ .v_ss, .sqrt },
dst_reg,
dst_reg,
dst_reg,
);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -4843,22 +4843,22 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
2...8 => {
const wide_reg = registerAlias(dst_reg, abi_size * 2);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ ._, .vcvtph2ps },
+ .{ .v_, .cvtph2ps },
wide_reg,
src_mcv.mem(Memory.PtrSize.fromSize(
@intCast(u32, @divExact(wide_reg.bitSize(), 16)),
)),
) else try self.asmRegisterRegister(
- .{ ._, .vcvtph2ps },
+ .{ .v_, .cvtph2ps },
wide_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ ._, .vsqrtps }, wide_reg, wide_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
wide_reg,
Immediate.u(0b1_00),
@@ -4868,15 +4868,15 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
else => null,
} else null,
32 => switch (ty.vectorLen()) {
- 1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss },
- 2...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else .{ ._, .sqrtps },
- 5...8 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else null,
+ 1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
+ 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null,
else => null,
},
64 => switch (ty.vectorLen()) {
- 1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd },
- 2 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else .{ ._, .sqrtpd },
- 3...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else null,
+ 1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
+ 2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt },
+ 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null,
else => null,
},
80, 128 => null,
@@ -4888,8 +4888,8 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
})) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{
ty.fmt(self.bin_file.options.module.?),
});
- switch (mir_tag[1]) {
- .vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ switch (mir_tag[0]) {
+ .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
mir_tag,
dst_reg,
dst_reg,
@@ -6325,13 +6325,13 @@ fn genBinOp(
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
- .{ ._, .vpinsrw },
+ .{ .vp_w, .insr },
dst_reg,
dst_reg,
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
- .{ ._, .vpunpcklwd },
+ .{ .vp_, .unpcklwd },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -6339,15 +6339,15 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
- try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
- .add => .{ ._, .vaddss },
- .sub => .{ ._, .vsubss },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss },
- .max => .{ ._, .vmaxss },
- .min => .{ ._, .vmaxss },
+ .add => .{ .v_ss, .add },
+ .sub => .{ .v_ss, .sub },
+ .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
+ .max => .{ .v_ss, .max },
+ .min => .{ .v_ss, .max },
else => unreachable,
},
dst_reg,
@@ -6355,7 +6355,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -6363,29 +6363,29 @@ fn genBinOp(
return dst_mcv;
} else null,
32 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss },
- .sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss },
- .mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss },
+ .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
+ .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
+ .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
- => if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss },
- .max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss },
- .min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss },
+ => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
+ .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
+ .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
else => unreachable,
},
64 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd },
- .sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd },
- .mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd },
+ .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
+ .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
+ .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
- => if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd },
- .max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd },
- .min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd },
+ => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
+ .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
+ .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
else => unreachable,
},
80, 128 => null,
@@ -6401,13 +6401,13 @@ fn genBinOp(
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
- .{ ._, .vpinsrw },
+ .{ .vp_w, .insr },
dst_reg,
dst_reg,
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
- .{ ._, .vpunpcklwd },
+ .{ .vp_, .unpcklwd },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -6415,15 +6415,15 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
- try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
- .add => .{ ._, .vaddss },
- .sub => .{ ._, .vsubss },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss },
- .max => .{ ._, .vmaxss },
- .min => .{ ._, .vmaxss },
+ .add => .{ .v_ss, .add },
+ .sub => .{ .v_ss, .sub },
+ .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
+ .max => .{ .v_ss, .max },
+ .min => .{ .v_ss, .max },
else => unreachable,
},
dst_reg,
@@ -6431,7 +6431,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -6444,12 +6444,12 @@ fn genBinOp(
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
- .{ ._, .vpinsrd },
+ .{ .vp_d, .insr },
dst_reg,
src_mcv.mem(.dword),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
- .{ ._, .vunpcklps },
+ .{ .v_ps, .unpckl },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -6457,20 +6457,20 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
try self.asmRegisterRegisterRegister(
- .{ ._, .vmovhlps },
+ .{ .v_ps, .movhl },
tmp_reg,
dst_reg,
dst_reg,
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
- .add => .{ ._, .vaddps },
- .sub => .{ ._, .vsubps },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
- .max => .{ ._, .vmaxps },
- .min => .{ ._, .vmaxps },
+ .add => .{ .v_ps, .add },
+ .sub => .{ .v_ps, .sub },
+ .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
+ .max => .{ .v_ps, .max },
+ .min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg,
@@ -6478,7 +6478,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -6490,13 +6490,13 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ ._, .vcvtph2ps },
+ .{ .v_, .cvtph2ps },
tmp_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
- .{ ._, .vcvtph2ps },
+ .{ .v_, .cvtph2ps },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -6505,11 +6505,11 @@ fn genBinOp(
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
- .add => .{ ._, .vaddps },
- .sub => .{ ._, .vsubps },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
- .max => .{ ._, .vmaxps },
- .min => .{ ._, .vmaxps },
+ .add => .{ .v_ps, .add },
+ .sub => .{ .v_ps, .sub },
+ .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
+ .max => .{ .v_ps, .max },
+ .min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg,
@@ -6517,7 +6517,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -6529,13 +6529,13 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg.to256(), dst_reg);
+ try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ ._, .vcvtph2ps },
+ .{ .v_, .cvtph2ps },
tmp_reg,
src_mcv.mem(.xword),
) else try self.asmRegisterRegister(
- .{ ._, .vcvtph2ps },
+ .{ .v_, .cvtph2ps },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -6544,11 +6544,11 @@ fn genBinOp(
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
- .add => .{ ._, .vaddps },
- .sub => .{ ._, .vsubps },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
- .max => .{ ._, .vmaxps },
- .min => .{ ._, .vmaxps },
+ .add => .{ .v_ps, .add },
+ .sub => .{ .v_ps, .sub },
+ .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
+ .max => .{ .v_ps, .max },
+ .min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg.to256(),
@@ -6556,7 +6556,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
- .{ ._, .vcvtps2ph },
+ .{ .v_, .cvtps2ph },
dst_reg,
dst_reg.to256(),
Immediate.u(0b1_00),
@@ -6567,76 +6567,76 @@ fn genBinOp(
} else null,
32 => switch (lhs_ty.vectorLen()) {
1 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss },
- .sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss },
- .mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss },
+ .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
+ .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
+ .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
- => if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss },
- .max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss },
- .min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss },
+ => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
+ .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
+ .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
else => unreachable,
},
2...4 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ ._, .vaddps } else .{ ._, .addps },
- .sub => if (self.hasFeature(.avx)) .{ ._, .vsubps } else .{ ._, .subps },
- .mul => if (self.hasFeature(.avx)) .{ ._, .vmulps } else .{ ._, .mulps },
+ .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add },
+ .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub },
+ .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
- => if (self.hasFeature(.avx)) .{ ._, .vdivps } else .{ ._, .divps },
- .max => if (self.hasFeature(.avx)) .{ ._, .vmaxps } else .{ ._, .maxps },
- .min => if (self.hasFeature(.avx)) .{ ._, .vminps } else .{ ._, .minps },
+ => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
+ .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
+ .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
else => unreachable,
},
5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
- .add => .{ ._, .vaddps },
- .sub => .{ ._, .vsubps },
- .mul => .{ ._, .vmulps },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
- .max => .{ ._, .vmaxps },
- .min => .{ ._, .vminps },
+ .add => .{ .v_ps, .add },
+ .sub => .{ .v_ps, .sub },
+ .mul => .{ .v_ps, .mul },
+ .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
+ .max => .{ .v_ps, .max },
+ .min => .{ .v_ps, .min },
else => unreachable,
} else null,
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd },
- .sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd },
- .mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd },
+ .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
+ .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
+ .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
- => if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd },
- .max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd },
- .min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd },
+ => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
+ .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
+ .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
else => unreachable,
},
2 => switch (air_tag) {
- .add => if (self.hasFeature(.avx)) .{ ._, .vaddpd } else .{ ._, .addpd },
- .sub => if (self.hasFeature(.avx)) .{ ._, .vsubpd } else .{ ._, .subpd },
- .mul => if (self.hasFeature(.avx)) .{ ._, .vmulpd } else .{ ._, .mulpd },
+ .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add },
+ .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub },
+ .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
- => if (self.hasFeature(.avx)) .{ ._, .vdivpd } else .{ ._, .divpd },
- .max => if (self.hasFeature(.avx)) .{ ._, .vmaxpd } else .{ ._, .maxpd },
- .min => if (self.hasFeature(.avx)) .{ ._, .vminpd } else .{ ._, .minpd },
+ => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
+ .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
+ .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
else => unreachable,
},
3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
- .add => .{ ._, .vaddpd },
- .sub => .{ ._, .vsubpd },
- .mul => .{ ._, .vmulpd },
- .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivpd },
- .max => .{ ._, .vmaxpd },
- .min => .{ ._, .vminpd },
+ .add => .{ .v_pd, .add },
+ .sub => .{ .v_pd, .sub },
+ .mul => .{ .v_pd, .mul },
+ .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
+ .max => .{ .v_pd, .max },
+ .min => .{ .v_pd, .min },
else => unreachable,
} else null,
else => null,
@@ -7563,13 +7563,13 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
defer self.register_manager.unlockReg(tmp2_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
- .{ ._, .vpinsrw },
+ .{ .vp_w, .insr },
tmp1_reg,
dst_reg.to128(),
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
- .{ ._, .vpunpcklwd },
+ .{ .vp_, .unpcklwd },
tmp1_reg,
dst_reg.to128(),
(if (src_mcv.isRegister())
@@ -7577,20 +7577,20 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
else
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, tmp1_reg, tmp1_reg);
- try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp2_reg, tmp1_reg);
- try self.genBinOpMir(.{ ._, .ucomiss }, ty, tmp1_mcv, tmp2_mcv);
+ try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg);
+ try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
+ try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
} else return self.fail("TODO implement airCmp for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
32 => try self.genBinOpMir(
- .{ ._, .ucomiss },
+ .{ ._ss, .ucomi },
ty,
.{ .register = dst_reg },
src_mcv,
),
64 => try self.genBinOpMir(
- .{ ._, .ucomisd },
+ .{ ._sd, .ucomi },
ty,
.{ .register = dst_reg },
src_mcv,
@@ -8573,42 +8573,42 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
else => return .{ ._, .mov },
.Float => switch (ty.floatBits(self.target.*)) {
16 => unreachable, // needs special handling
- 32 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
- 64 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
+ 32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
+ 64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
128 => return if (self.hasFeature(.avx))
- if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
- else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
+ if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
+ else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
else => {},
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
16 => switch (ty.vectorLen()) {
1 => unreachable, // needs special handling
- 2 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
- 3...4 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
+ 2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
+ 3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
5...8 => return if (self.hasFeature(.avx))
- if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
- else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
+ if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
+ else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
9...16 => if (self.hasFeature(.avx))
- return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
+ return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
else => {},
},
32 => switch (ty.vectorLen()) {
- 1 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
+ 1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
2...4 => return if (self.hasFeature(.avx))
- if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
- else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
+ if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
+ else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
5...8 => if (self.hasFeature(.avx))
- return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
+ return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
else => {},
},
64 => switch (ty.vectorLen()) {
- 1 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
+ 1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
2 => return if (self.hasFeature(.avx))
- if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
- else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
+ if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
+ else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
3...4 => if (self.hasFeature(.avx))
- return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
+ return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
else => {},
},
else => {},
@@ -8724,11 +8724,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point))
switch (ty.zigTypeTag()) {
else => .{ ._, .mov },
- .Float, .Vector => .{ ._, .movaps },
+ .Float, .Vector => .{ ._ps, .mova },
}
else switch (abi_size) {
2 => return try self.asmRegisterRegisterImmediate(
- if (dst_reg.class() == .floating_point) .{ ._, .pinsrw } else .{ ._, .pextrw },
+ if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr },
registerAlias(dst_reg, 4),
registerAlias(src_reg, 4),
Immediate.u(0),
@@ -8761,7 +8761,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
});
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
try self.asmRegisterMemoryImmediate(
- .{ ._, .pinsrw },
+ .{ .p_w, .insr },
registerAlias(dst_reg, abi_size),
src_mem,
Immediate.u(0),
@@ -8794,7 +8794,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
});
return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
self.asmRegisterMemoryImmediate(
- .{ ._, .pinsrw },
+ .{ .p_w, .insr },
registerAlias(dst_reg, abi_size),
src_mem,
Immediate.u(0),
@@ -8838,7 +8838,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
});
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
try self.asmRegisterMemoryImmediate(
- .{ ._, .pinsrw },
+ .{ .p_w, .insr },
registerAlias(dst_reg, abi_size),
src_mem,
Immediate.u(0),
@@ -8952,7 +8952,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
);
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
try self.asmMemoryRegisterImmediate(
- .{ ._, .pextrw },
+ .{ .p_w, .extr },
dst_mem,
src_reg.to128(),
Immediate.u(0),
@@ -9069,7 +9069,7 @@ fn genInlineMemcpyRegisterRegister(
try self.asmMemoryRegister(
switch (src_reg.class()) {
.general_purpose, .segment => .{ ._, .mov },
- .floating_point => .{ ._, .movss },
+ .floating_point => .{ ._ss, .mov },
},
Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }),
registerAlias(src_reg, abi_size),
@@ -10197,21 +10197,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 }))
switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
- 32 => .{ ._, .vfmadd132ss },
- 64 => .{ ._, .vfmadd132sd },
+ 32 => .{ .v_ss, .fmadd132 },
+ 64 => .{ .v_sd, .fmadd132 },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
32 => switch (ty.vectorLen()) {
- 1 => .{ ._, .vfmadd132ss },
- 2...8 => .{ ._, .vfmadd132ps },
+ 1 => .{ .v_ss, .fmadd132 },
+ 2...8 => .{ .v_ps, .fmadd132 },
else => null,
},
64 => switch (ty.vectorLen()) {
- 1 => .{ ._, .vfmadd132sd },
- 2...4 => .{ ._, .vfmadd132pd },
+ 1 => .{ .v_sd, .fmadd132 },
+ 2...4 => .{ .v_pd, .fmadd132 },
else => null,
},
16, 80, 128 => null,
@@ -10224,21 +10224,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 }))
switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
- 32 => .{ ._, .vfmadd213ss },
- 64 => .{ ._, .vfmadd213sd },
+ 32 => .{ .v_ss, .fmadd213 },
+ 64 => .{ .v_sd, .fmadd213 },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
32 => switch (ty.vectorLen()) {
- 1 => .{ ._, .vfmadd213ss },
- 2...8 => .{ ._, .vfmadd213ps },
+ 1 => .{ .v_ss, .fmadd213 },
+ 2...8 => .{ .v_ps, .fmadd213 },
else => null,
},
64 => switch (ty.vectorLen()) {
- 1 => .{ ._, .vfmadd213sd },
- 2...4 => .{ ._, .vfmadd213pd },
+ 1 => .{ .v_sd, .fmadd213 },
+ 2...4 => .{ .v_pd, .fmadd213 },
else => null,
},
16, 80, 128 => null,
@@ -10251,21 +10251,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 }))
switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
- 32 => .{ ._, .vfmadd231ss },
- 64 => .{ ._, .vfmadd231sd },
+ 32 => .{ .v_ss, .fmadd231 },
+ 64 => .{ .v_sd, .fmadd231 },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
32 => switch (ty.vectorLen()) {
- 1 => .{ ._, .vfmadd231ss },
- 2...8 => .{ ._, .vfmadd231ps },
+ 1 => .{ .v_ss, .fmadd231 },
+ 2...8 => .{ .v_ps, .fmadd231 },
else => null,
},
64 => switch (ty.vectorLen()) {
- 1 => .{ ._, .vfmadd231sd },
- 2...4 => .{ ._, .vfmadd231pd },
+ 1 => .{ .v_sd, .fmadd231 },
+ 2...4 => .{ .v_pd, .fmadd231 },
else => null,
},
16, 80, 128 => null,
src/arch/x86_64/Mir.zig
@@ -278,8 +278,14 @@ pub const Inst = struct {
/// Add with carry
adc,
/// Add
+ /// Add packed single-precision floating-point values
+ /// Add scalar single-precision floating-point values
+ /// Add packed double-precision floating-point values
+ /// Add scalar double-precision floating-point values
add,
/// Logical and
+ /// Bitwise logical and of packed single-precision floating-point values
+ /// Bitwise logical and of packed double-precision floating-point values
@"and",
/// Bit scan forward
bsf,
@@ -304,6 +310,8 @@ pub const Inst = struct {
cmov,
/// Logical compare
/// Compare string
+ /// Compare scalar single-precision floating-point values
+ /// Compare scalar double-precision floating-point values
cmp,
/// Compare and exchange
/// Compare and exchange bytes
@@ -316,6 +324,10 @@ pub const Inst = struct {
cwde,
/// Unsigned division
/// Signed division
+ /// Divide packed single-precision floating-point values
+ /// Divide scalar single-precision floating-point values
+ /// Divide packed double-precision floating-point values
+ /// Divide scalar double-precision floating-point values
div,
///
int3,
@@ -339,6 +351,8 @@ pub const Inst = struct {
mfence,
/// Move
/// Move data from string to string
+ /// Move scalar single-precision floating-point value
+ /// Move scalar double-precision floating-point value
/// Move doubleword
/// Move quadword
mov,
@@ -350,6 +364,10 @@ pub const Inst = struct {
movzx,
/// Multiply
/// Signed multiplication
+ /// Multiply packed single-precision floating-point values
+ /// Multiply scalar single-precision floating-point values
+ /// Multiply packed double-precision floating-point values
+ /// Multiply scalar double-precision floating-point values
mul,
/// Two's complement negation
neg,
@@ -358,6 +376,8 @@ pub const Inst = struct {
/// One's complement negation
not,
/// Logical or
+ /// Bitwise logical or of packed single-precision floating-point values
+ /// Bitwise logical or of packed double-precision floating-point values
@"or",
/// Pop
pop,
@@ -390,6 +410,10 @@ pub const Inst = struct {
/// Double precision shift right
sh,
/// Subtract
+ /// Subtract packed single-precision floating-point values
+ /// Subtract scalar single-precision floating-point values
+ /// Subtract packed double-precision floating-point values
+ /// Subtract scalar double-precision floating-point values
sub,
/// Store string
sto,
@@ -406,145 +430,88 @@ pub const Inst = struct {
/// Exchange register/memory with register
xchg,
/// Logical exclusive-or
+ /// Bitwise logical xor of packed single-precision floating-point values
+ /// Bitwise logical xor of packed double-precision floating-point values
xor,
- /// Add packed single-precision floating-point values
- addps,
- /// Add scalar single-precision floating-point values
- addss,
- /// Bitwise logical and of packed single precision floating-point values
- andps,
- /// Bitwise logical and not of packed single precision floating-point values
- andnps,
- /// Compare scalar single-precision floating-point values
- cmpss,
+ /// Bitwise logical and not of packed single-precision floating-point values
+ /// Bitwise logical and not of packed double-precision floating-point values
+ andn,
/// Convert doubleword integer to scalar single-precision floating-point value
cvtsi2ss,
- /// Divide packed single-precision floating-point values
- divps,
- /// Divide scalar single-precision floating-point values
- divss,
/// Maximum of packed single-precision floating-point values
- maxps,
/// Maximum of scalar single-precision floating-point values
- maxss,
+ /// Maximum of packed double-precision floating-point values
+ /// Maximum of scalar double-precision floating-point values
+ max,
/// Minimum of packed single-precision floating-point values
- minps,
/// Minimum of scalar single-precision floating-point values
- minss,
+ /// Minimum of packed double-precision floating-point values
+ /// Minimum of scalar double-precision floating-point values
+ min,
/// Move aligned packed single-precision floating-point values
- movaps,
+ /// Move aligned packed double-precision floating-point values
+ mova,
/// Move packed single-precision floating-point values high to low
- movhlps,
- /// Move scalar single-precision floating-point value
- movss,
+ movhl,
/// Move unaligned packed single-precision floating-point values
- movups,
- /// Multiply packed single-precision floating-point values
- mulps,
- /// Multiply scalar single-precision floating-point values
- mulss,
- /// Bitwise logical or of packed single precision floating-point values
- orps,
+ /// Move unaligned packed double-precision floating-point values
+ movu,
+ /// Extract byte
/// Extract word
- pextrw,
+ /// Extract doubleword
+ /// Extract quadword
+ extr,
+ /// Insert byte
/// Insert word
- pinsrw,
+ /// Insert doubleword
+ /// Insert quadword
+ insr,
/// Square root of packed single-precision floating-point values
- sqrtps,
/// Square root of scalar single-precision floating-point value
- sqrtss,
- /// Subtract packed single-precision floating-point values
- subps,
- /// Subtract scalar single-precision floating-point values
- subss,
+ /// Square root of packed double-precision floating-point values
+ /// Square root of scalar double-precision floating-point value
+ sqrt,
/// Unordered compare scalar single-precision floating-point values
- ucomiss,
+ /// Unordered compare scalar double-precision floating-point values
+ ucomi,
/// Unpack and interleave high packed single-precision floating-point values
- unpckhps,
+ /// Unpack and interleave high packed double-precision floating-point values
+ unpckh,
/// Unpack and interleave low packed single-precision floating-point values
- unpcklps,
- /// Bitwise logical xor of packed single precision floating-point values
- xorps,
+ /// Unpack and interleave low packed double-precision floating-point values
+ unpckl,
- /// Add packed double-precision floating-point values
- addpd,
- /// Add scalar double-precision floating-point values
- addsd,
- /// Bitwise logical and not of packed double precision floating-point values
- andnpd,
- /// Bitwise logical and of packed double precision floating-point values
- andpd,
- /// Compare scalar double-precision floating-point values
- cmpsd,
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
cvtsd2ss,
/// Convert doubleword integer to scalar double-precision floating-point value
cvtsi2sd,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
cvtss2sd,
- /// Divide packed double-precision floating-point values
- divpd,
- /// Divide scalar double-precision floating-point values
- divsd,
- /// Maximum of packed double-precision floating-point values
- maxpd,
- /// Maximum of scalar double-precision floating-point values
- maxsd,
- /// Minimum of packed double-precision floating-point values
- minpd,
- /// Minimum of scalar double-precision floating-point values
- minsd,
- /// Move scalar double-precision floating-point value
- movsd,
- /// Multiply packed double-precision floating-point values
- mulpd,
- /// Multiply scalar double-precision floating-point values
- mulsd,
- /// Bitwise logical or of packed double precision floating-point values
- orpd,
/// Shuffle packed high words
- pshufhw,
+ shufh,
/// Shuffle packed low words
- pshuflw,
+ shufl,
/// Shift packed data right logical
- psrld,
/// Shift packed data right logical
- psrlq,
/// Shift packed data right logical
- psrlw,
+ srl,
/// Unpack high data
- punpckhbw,
+ unpckhbw,
/// Unpack high data
- punpckhdq,
+ unpckhdq,
/// Unpack high data
- punpckhqdq,
+ unpckhqdq,
/// Unpack high data
- punpckhwd,
+ unpckhwd,
/// Unpack low data
- punpcklbw,
+ unpcklbw,
/// Unpack low data
- punpckldq,
+ unpckldq,
/// Unpack low data
- punpcklqdq,
+ unpcklqdq,
/// Unpack low data
- punpcklwd,
- /// Square root of double precision floating-point values
- sqrtpd,
- /// Square root of scalar double precision floating-point value
- sqrtsd,
- /// Subtract packed double-precision floating-point values
- subpd,
- /// Subtract scalar double-precision floating-point values
- subsd,
- /// Unordered compare scalar double-precision floating-point values
- ucomisd,
- /// Unpack and interleave high packed double-precision floating-point values
- unpckhpd,
- /// Unpack and interleave low packed double-precision floating-point values
- unpcklpd,
- /// Bitwise logical xor of packed double precision floating-point values
- xorpd,
+ unpcklwd,
/// Replicate double floating-point values
movddup,
@@ -553,199 +520,32 @@ pub const Inst = struct {
/// Replicate single floating-point values
movsldup,
- /// Extract Byte
- pextrb,
- /// Extract Doubleword
- pextrd,
- /// Extract Quadword
- pextrq,
- /// Insert Byte
- pinsrb,
- /// Insert Doubleword
- pinsrd,
- /// Insert Quadword
- pinsrq,
- /// Round packed double-precision floating-point values
- roundpd,
/// Round packed single-precision floating-point values
- roundps,
- /// Round scalar double-precision floating-point value
- roundsd,
/// Round scalar single-precision floating-point value
- roundss,
-
- /// Add packed double-precision floating-point values
- vaddpd,
- /// Add packed single-precision floating-point values
- vaddps,
- /// Add scalar double-precision floating-point values
- vaddsd,
- /// Add scalar single-precision floating-point values
- vaddss,
- /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
- vcvtsd2ss,
- /// Convert doubleword integer to scalar double-precision floating-point value
- vcvtsi2sd,
- /// Convert doubleword integer to scalar single-precision floating-point value
- vcvtsi2ss,
- /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
- vcvtss2sd,
- /// Divide packed double-precision floating-point values
- vdivpd,
- /// Divide packed single-precision floating-point values
- vdivps,
- /// Divide scalar double-precision floating-point values
- vdivsd,
- /// Divide scalar single-precision floating-point values
- vdivss,
- /// Maximum of packed double-precision floating-point values
- vmaxpd,
- /// Maximum of packed single-precision floating-point values
- vmaxps,
- /// Maximum of scalar double-precision floating-point values
- vmaxsd,
- /// Maximum of scalar single-precision floating-point values
- vmaxss,
- /// Minimum of packed double-precision floating-point values
- vminpd,
- /// Minimum of packed single-precision floating-point values
- vminps,
- /// Minimum of scalar double-precision floating-point values
- vminsd,
- /// Minimum of scalar single-precision floating-point values
- vminss,
- /// Move aligned packed double-precision floating-point values
- vmovapd,
- /// Move aligned packed single-precision floating-point values
- vmovaps,
- /// Move packed single-precision floating-point values high to low
- vmovhlps,
- /// Replicate double floating-point values
- vmovddup,
- /// Move or merge scalar double-precision floating-point value
- vmovsd,
- /// Replicate single floating-point values
- vmovshdup,
- /// Replicate single floating-point values
- vmovsldup,
- /// Move or merge scalar single-precision floating-point value
- vmovss,
- /// Move unaligned packed double-precision floating-point values
- vmovupd,
- /// Move unaligned packed single-precision floating-point values
- vmovups,
- /// Multiply packed double-precision floating-point values
- vmulpd,
- /// Multiply packed single-precision floating-point values
- vmulps,
- /// Multiply scalar double-precision floating-point values
- vmulsd,
- /// Multiply scalar single-precision floating-point values
- vmulss,
- /// Extract Byte
- vpextrb,
- /// Extract Doubleword
- vpextrd,
- /// Extract Quadword
- vpextrq,
- /// Extract word
- vpextrw,
- /// Insert Byte
- vpinsrb,
- /// Insert Doubleword
- vpinsrd,
- /// Insert Quadword
- vpinsrq,
- /// Insert word
- vpinsrw,
- /// Shuffle packed high words
- vpshufhw,
- /// Shuffle packed low words
- vpshuflw,
- /// Shift packed data right logical
- vpsrld,
- /// Shift packed data right logical
- vpsrlq,
- /// Shift packed data right logical
- vpsrlw,
- /// Unpack high data
- vpunpckhbw,
- /// Unpack high data
- vpunpckhdq,
- /// Unpack high data
- vpunpckhqdq,
- /// Unpack high data
- vpunpckhwd,
- /// Unpack low data
- vpunpcklbw,
- /// Unpack low data
- vpunpckldq,
- /// Unpack low data
- vpunpcklqdq,
- /// Unpack low data
- vpunpcklwd,
/// Round packed double-precision floating-point values
- vroundpd,
- /// Round packed single-precision floating-point values
- vroundps,
/// Round scalar double-precision floating-point value
- vroundsd,
- /// Round scalar single-precision floating-point value
- vroundss,
- /// Square root of packed double-precision floating-point value
- vsqrtpd,
- /// Square root of packed single-precision floating-point value
- vsqrtps,
- /// Square root of scalar double-precision floating-point value
- vsqrtsd,
- /// Square root of scalar single-precision floating-point value
- vsqrtss,
- /// Subtract packed double-precision floating-point values
- vsubpd,
- /// Subtract packed single-precision floating-point values
- vsubps,
- /// Subtract scalar double-precision floating-point values
- vsubsd,
- /// Subtract scalar single-precision floating-point values
- vsubss,
- /// Unpack and interleave high packed double-precision floating-point values
- vunpckhpd,
- /// Unpack and interleave high packed single-precision floating-point values
- vunpckhps,
- /// Unpack and interleave low packed double-precision floating-point values
- vunpcklpd,
- /// Unpack and interleave low packed single-precision floating-point values
- vunpcklps,
+ round,
/// Convert 16-bit floating-point values to single-precision floating-point values
- vcvtph2ps,
+ cvtph2ps,
/// Convert single-precision floating-point values to 16-bit floating-point values
- vcvtps2ph,
+ cvtps2ph,
- /// Fused multiply-add of packed double-precision floating-point values
- vfmadd132pd,
- /// Fused multiply-add of packed double-precision floating-point values
- vfmadd213pd,
- /// Fused multiply-add of packed double-precision floating-point values
- vfmadd231pd,
- /// Fused multiply-add of packed single-precision floating-point values
- vfmadd132ps,
/// Fused multiply-add of packed single-precision floating-point values
- vfmadd213ps,
- /// Fused multiply-add of packed single-precision floating-point values
- vfmadd231ps,
- /// Fused multiply-add of scalar double-precision floating-point values
- vfmadd132sd,
- /// Fused multiply-add of scalar double-precision floating-point values
- vfmadd213sd,
- /// Fused multiply-add of scalar double-precision floating-point values
- vfmadd231sd,
/// Fused multiply-add of scalar single-precision floating-point values
- vfmadd132ss,
+ /// Fused multiply-add of packed double-precision floating-point values
+ /// Fused multiply-add of scalar double-precision floating-point values
+ fmadd132,
+ /// Fused multiply-add of packed single-precision floating-point values
/// Fused multiply-add of scalar single-precision floating-point values
- vfmadd213ss,
+ /// Fused multiply-add of packed double-precision floating-point values
+ /// Fused multiply-add of scalar double-precision floating-point values
+ fmadd213,
+ /// Fused multiply-add of packed single-precision floating-point values
/// Fused multiply-add of scalar single-precision floating-point values
- vfmadd231ss,
+ /// Fused multiply-add of packed double-precision floating-point values
+ /// Fused multiply-add of scalar double-precision floating-point values
+ fmadd231,
/// A pseudo instruction that requires special lowering.
/// This should be the only tag in this enum that doesn't