Commit c23e80e671
Changed files (5)
src
src/arch/x86_64/CodeGen.zig
@@ -8561,7 +8561,8 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
},
32 => switch (ty.vectorLen()) {
1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
- 2...4 => return if (self.hasFeature(.avx))
+ 2 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
+ 3...4 => return if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
5...8 => if (self.hasFeature(.avx))
@@ -8577,6 +8578,14 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
else => {},
},
+ 128 => switch (ty.vectorLen()) {
+ 1 => return if (self.hasFeature(.avx))
+ if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
+ else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+ 2 => if (self.hasFeature(.avx))
+ return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
+ else => {},
+ },
else => {},
},
else => {},
@@ -9939,9 +9948,200 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
- _ = ty_op;
- return self.fail("TODO implement airSplat for x86_64", .{});
- //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+ const vector_ty = self.air.typeOfIndex(inst);
+ const dst_rc = regClassForType(vector_ty);
+ const scalar_ty = vector_ty.scalarType();
+
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ const result: MCValue = result: {
+ switch (scalar_ty.zigTypeTag()) {
+ else => {},
+ .Float => switch (scalar_ty.floatBits(self.target.*)) {
+ 32 => switch (vector_ty.vectorLen()) {
+ 1 => {
+ if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ try self.genSetReg(dst_reg, scalar_ty, src_mcv);
+ break :result .{ .register = dst_reg };
+ },
+ 2...4 => {
+ if (self.hasFeature(.avx)) {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_ss, .broadcast },
+ dst_reg.to128(),
+ src_mcv.mem(.dword),
+ ) else {
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv);
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_ps, .shuf },
+ dst_reg.to128(),
+ src_reg.to128(),
+ src_reg.to128(),
+ Immediate.u(0),
+ );
+ }
+ break :result .{ .register = dst_reg };
+ } else {
+ const dst_mcv = if (src_mcv.isRegister() and
+ self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
+ src_mcv
+ else
+ try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ try self.asmRegisterRegisterImmediate(
+ .{ ._ps, .shuf },
+ dst_reg.to128(),
+ dst_reg.to128(),
+ Immediate.u(0),
+ );
+ break :result dst_mcv;
+ }
+ },
+ 5...8 => if (self.hasFeature(.avx)) {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_ss, .broadcast },
+ dst_reg.to256(),
+ src_mcv.mem(.dword),
+ ) else {
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv);
+ if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
+ .{ .v_ss, .broadcast },
+ dst_reg.to256(),
+ src_reg.to128(),
+ ) else {
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_ps, .shuf },
+ dst_reg.to128(),
+ src_reg.to128(),
+ src_reg.to128(),
+ Immediate.u(0),
+ );
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_f128, .insert },
+ dst_reg.to256(),
+ dst_reg.to256(),
+ dst_reg.to128(),
+ Immediate.u(1),
+ );
+ }
+ }
+ break :result .{ .register = dst_reg };
+ },
+ else => {},
+ },
+ 64 => switch (vector_ty.vectorLen()) {
+ 1 => {
+ if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ try self.genSetReg(dst_reg, scalar_ty, src_mcv);
+ break :result .{ .register = dst_reg };
+ },
+ 2 => {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (self.hasFeature(.sse3)) {
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
+ dst_reg.to128(),
+ src_mcv.mem(.qword),
+ ) else try self.asmRegisterRegister(
+ if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
+ dst_reg.to128(),
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
+ );
+ break :result .{ .register = dst_reg };
+ } else try self.asmRegisterRegister(
+ .{ ._ps, .movlh },
+ dst_reg.to128(),
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
+ );
+ },
+ 3...4 => if (self.hasFeature(.avx)) {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_sd, .broadcast },
+ dst_reg.to256(),
+ src_mcv.mem(.qword),
+ ) else {
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv);
+ if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
+ .{ .v_sd, .broadcast },
+ dst_reg.to256(),
+ src_reg.to128(),
+ ) else {
+ try self.asmRegisterRegister(
+ .{ .v_, .movddup },
+ dst_reg.to128(),
+ src_reg.to128(),
+ );
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_f128, .insert },
+ dst_reg.to256(),
+ dst_reg.to256(),
+ dst_reg.to128(),
+ Immediate.u(1),
+ );
+ }
+ }
+ break :result .{ .register = dst_reg };
+ },
+ else => {},
+ },
+ 128 => switch (vector_ty.vectorLen()) {
+ 1 => {
+ if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ try self.genSetReg(dst_reg, scalar_ty, src_mcv);
+ break :result .{ .register = dst_reg };
+ },
+ 2 => if (self.hasFeature(.avx)) {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_f128, .broadcast },
+ dst_reg.to256(),
+ src_mcv.mem(.xword),
+ ) else {
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv);
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_f128, .insert },
+ dst_reg.to256(),
+ src_reg.to256(),
+ src_reg.to128(),
+ Immediate.u(1),
+ );
+ }
+ break :result .{ .register = dst_reg };
+ },
+ else => {},
+ },
+ 16, 80 => {},
+ else => unreachable,
+ },
+ }
+ return self.fail("TODO implement airSplat for {}", .{
+ vector_ty.fmt(self.bin_file.options.module.?),
+ });
+ };
+ return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
src/arch/x86_64/Encoding.zig
@@ -270,10 +270,12 @@ pub const Mnemonic = enum {
divps, divss,
maxps, maxss,
minps, minss,
- movaps, movhlps, movss, movups,
+ movaps, movhlps, movlhps,
+ movss, movups,
mulps, mulss,
orps,
pextrw, pinsrw,
+ shufps,
sqrtps, sqrtss,
subps, subss,
ucomiss,
@@ -296,6 +298,7 @@ pub const Mnemonic = enum {
psrld, psrlq, psrlw,
punpckhbw, punpckhdq, punpckhqdq, punpckhwd,
punpcklbw, punpckldq, punpcklqdq, punpcklwd,
+ shufpd,
sqrtpd, sqrtsd,
subpd, subsd,
ucomisd,
@@ -303,17 +306,22 @@ pub const Mnemonic = enum {
// SSE3
movddup, movshdup, movsldup,
// SSE4.1
+ extractps,
+ insertps,
pextrb, pextrd, pextrq,
pinsrb, pinsrd, pinsrq,
roundpd, roundps, roundsd, roundss,
// AVX
vaddpd, vaddps, vaddsd, vaddss,
+ vbroadcastf128, vbroadcastsd, vbroadcastss,
vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd,
vdivpd, vdivps, vdivsd, vdivss,
+ vextractf128, vextractps,
+ vinsertf128, vinsertps,
vmaxpd, vmaxps, vmaxsd, vmaxss,
vminpd, vminps, vminsd, vminss,
vmovapd, vmovaps,
- vmovddup, vmovhlps,
+ vmovddup, vmovhlps, vmovlhps,
vmovsd,
vmovshdup, vmovsldup,
vmovss,
@@ -326,6 +334,7 @@ pub const Mnemonic = enum {
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
vroundpd, vroundps, vroundsd, vroundss,
+ vshufpd, vshufps,
vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
vsubpd, vsubps, vsubsd, vsubss,
// F16C
src/arch/x86_64/encodings.zig
@@ -867,6 +867,8 @@ pub const table = [_]Entry{
.{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse },
+ .{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse },
+
.{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse },
.{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse },
@@ -879,14 +881,16 @@ pub const table = [_]Entry{
.{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse },
- .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
-
- .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
+ .{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse },
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },
.{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse },
+ .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
+
+ .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
+
.{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .none, .sse },
.{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse },
@@ -967,6 +971,8 @@ pub const table = [_]Entry{
.{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 },
.{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 },
+ .{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 },
+
.{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 },
.{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 },
@@ -990,6 +996,10 @@ pub const table = [_]Entry{
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
// SSE4.1
+ .{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 },
+
+ .{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 },
+
.{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 },
.{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 },
.{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 },
@@ -1019,6 +1029,11 @@ pub const table = [_]Entry{
.{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
+ .{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
+ .{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
+ .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
+ .{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx },
+
.{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
@@ -1039,6 +1054,14 @@ pub const table = [_]Entry{
.{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx },
+ .{ .vextractf128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x19 }, 0, .vex_256_w0, .avx },
+
+ .{ .vextractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .vex_128_wig, .avx },
+
+ .{ .vinsertf128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x18 }, 0, .vex_256_w0, .avx },
+
+ .{ .vinsertps, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .vex_128_wig, .avx },
+
.{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
.{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
@@ -1074,6 +1097,8 @@ pub const table = [_]Entry{
.{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
+ .{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
+
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
@@ -1150,6 +1175,12 @@ pub const table = [_]Entry{
.{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx },
+ .{ .vshufpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_128_wig, .avx },
+ .{ .vshufpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_256_wig, .avx },
+
+ .{ .vshufps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_128_wig, .avx },
+ .{ .vshufps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_256_wig, .avx },
+
.{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx },
.{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx },
@@ -1201,6 +1232,10 @@ pub const table = [_]Entry{
.{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma },
// AVX2
+ .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 },
+ .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
+ .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
+
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
src/arch/x86_64/Lower.zig
@@ -300,6 +300,8 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
else
.none,
}, mnemonic: {
+ @setEvalBranchQuota(2_000);
+
comptime var max_len = 0;
inline for (@typeInfo(Mnemonic).Enum.fields) |field| max_len = @max(field.name.len, max_len);
var buf: [max_len]u8 = undefined;
src/arch/x86_64/Mir.zig
@@ -256,6 +256,8 @@ pub const Inst = struct {
v_sd,
/// VEX-Encoded ___ Packed Double-Precision Values
v_pd,
+ /// VEX-Encoded ___ 128-Bits Of Floating-Point Data
+ v_f128,
/// Mask ___ Byte
k_b,
@@ -454,6 +456,8 @@ pub const Inst = struct {
mova,
/// Move packed single-precision floating-point values high to low
movhl,
+ /// Move packed single-precision floating-point values low to high
+ movlh,
/// Move unaligned packed single-precision floating-point values
/// Move unaligned packed double-precision floating-point values
movu,
@@ -488,6 +492,9 @@ pub const Inst = struct {
cvtsi2sd,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
cvtss2sd,
+ /// Packed interleave shuffle of quadruplets of single-precision floating-point values
+ /// Packed interleave shuffle of pairs of double-precision floating-point values
+ shuf,
/// Shuffle packed high words
shufh,
/// Shuffle packed low words
@@ -520,12 +527,20 @@ pub const Inst = struct {
/// Replicate single floating-point values
movsldup,
+ /// Extract packed floating-point values
+ extract,
+ /// Insert scalar single-precision floating-point value
+ /// Insert packed floating-point values
+ insert,
/// Round packed single-precision floating-point values
/// Round scalar single-precision floating-point value
/// Round packed double-precision floating-point values
/// Round scalar double-precision floating-point value
round,
+ /// Load with broadcast floating-point data
+ broadcast,
+
/// Convert 16-bit floating-point values to single-precision floating-point values
cvtph2ps,
/// Convert single-precision floating-point values to 16-bit floating-point values