Commit 513c4c145e
Changed files (4)
src
arch
src/arch/x86_64/CodeGen.zig
@@ -3274,8 +3274,8 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
break :dst dst_mcv;
} else dst: {
- const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
+ const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true);
+ try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
break :dst dst_mcv;
};
@@ -3333,22 +3333,40 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) },
};
- const dst_reg = registerAlias(dst_mcv.getReg().?, src_abi_size);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, src_abi_size);
if (self.hasFeature(.avx)) {
try self.asmRegisterRegisterMemory(
.{ .vp_, .@"and" },
- dst_reg,
- dst_reg,
+ dst_alias,
+ dst_alias,
try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
);
- try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg);
+ if (src_abi_size > 16) {
+ const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
+ const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
+ defer self.register_manager.unlockReg(temp_lock);
+
+ try self.asmRegisterRegisterImmediate(
+ .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract },
+ registerAlias(temp_reg, dst_abi_size),
+ dst_alias,
+ Immediate.u(1),
+ );
+ try self.asmRegisterRegisterRegister(
+ mir_tag,
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(temp_reg, dst_abi_size),
+ );
+ } else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias);
} else {
try self.asmRegisterMemory(
.{ .p_, .@"and" },
- dst_reg,
+ dst_alias,
try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
);
- try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg);
+ try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias);
}
break :result dst_mcv;
}
@@ -16404,7 +16422,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
},
65...128 => switch (vector_len) {
else => null,
- 1...2 => .{ .vp_i128, .broadcast },
+ 1...2 => .{ .v_i128, .broadcast },
},
}) orelse break :avx2;
@@ -16418,7 +16436,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))),
try src_mcv.mem(self, self.memSize(scalar_ty)),
) else {
- if (mir_tag[0] == .vp_i128) break :avx2;
+ if (mir_tag[0] == .v_i128) break :avx2;
try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
try self.asmRegisterRegister(
mir_tag,
src/arch/x86_64/Encoding.zig
@@ -415,7 +415,8 @@ pub const Mnemonic = enum {
vfmadd132sd, vfmadd213sd, vfmadd231sd,
vfmadd132ss, vfmadd213ss, vfmadd231ss,
// AVX2
- vpbroadcastb, vpbroadcastd, vpbroadcasti128, vpbroadcastq, vpbroadcastw,
+ vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw,
+ vextracti128, vinserti128,
// zig fmt: on
};
src/arch/x86_64/encodings.zig
@@ -1769,6 +1769,10 @@ pub const table = [_]Entry{
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
+ .{ .vextracti128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x39 }, 0, .vex_256_w0, .avx2 },
+
+ .{ .vinserti128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x38 }, 0, .vex_256_w0, .avx2 },
+
.{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 },
.{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 },
.{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 },
@@ -1809,7 +1813,7 @@ pub const table = [_]Entry{
.{ .vpbroadcastd, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_256_w0, .avx2 },
.{ .vpbroadcastq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_128_w0, .avx2 },
.{ .vpbroadcastq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_256_w0, .avx2 },
- .{ .vpbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 },
+ .{ .vbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 },
.{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 },
.{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 },
src/arch/x86_64/Mir.zig
@@ -230,6 +230,8 @@ pub const Inst = struct {
v_d,
/// VEX-Encoded ___ QuadWord
v_q,
+ /// VEX-Encoded ___ Integer Data
+ v_i128,
/// VEX-Encoded Packed ___
vp_,
/// VEX-Encoded Packed ___ Byte
@@ -242,8 +244,6 @@ pub const Inst = struct {
vp_q,
/// VEX-Encoded Packed ___ Double Quadword
vp_dq,
- /// VEX-Encoded Packed ___ Integer Data
- vp_i128,
/// VEX-Encoded ___ Scalar Single-Precision Values
v_ss,
/// VEX-Encoded ___ Packed Single-Precision Values
@@ -654,6 +654,7 @@ pub const Inst = struct {
/// Variable blend scalar double-precision floating-point values
blendv,
/// Extract packed floating-point values
+ /// Extract packed integer values
extract,
/// Insert scalar single-precision floating-point value
/// Insert packed floating-point values
@@ -696,6 +697,7 @@ pub const Inst = struct {
sha256rnds2,
/// Load with broadcast floating-point data
+ /// Load integer and broadcast
broadcast,
/// Convert 16-bit floating-point values to single-precision floating-point values