Commit b8f00ae337
Changed files (4)
src
arch
test
behavior
src/arch/x86_64/CodeGen.zig
@@ -5385,46 +5385,104 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void {
const mod = self.bin_file.options.module.?;
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
const ty = self.typeOf(ty_op.operand);
- const scalar_ty = ty.scalarType(mod);
- switch (scalar_ty.zigTypeTag(mod)) {
- .Int => if (ty.zigTypeTag(mod) == .Vector) {
- return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)});
- } else {
- if (ty.abiSize(mod) > 8) {
- return self.fail("TODO implement abs for integer abi sizes larger than 8", .{});
- }
- const src_mcv = try self.resolveInst(ty_op.operand);
- const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
+ const result: MCValue = result: {
+ const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) {
+ else => null,
+ .Int => {
+ if (ty.abiSize(mod) > 8) {
+ return self.fail("TODO implement abs for integer abi sizes larger than 8", .{});
+ }
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
- try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
+ try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv);
- const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
- switch (src_mcv) {
- .register => |val_reg| try self.asmCmovccRegisterRegister(
- registerAlias(dst_mcv.register, cmov_abi_size),
- registerAlias(val_reg, cmov_abi_size),
- .l,
- ),
- .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
- registerAlias(dst_mcv.register, cmov_abi_size),
- src_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)),
- .l,
- ),
- else => {
- const val_reg = try self.copyToTmpRegister(ty, src_mcv);
- try self.asmCmovccRegisterRegister(
+ const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(mod))), 2);
+ switch (src_mcv) {
+ .register => |val_reg| try self.asmCmovccRegisterRegister(
registerAlias(dst_mcv.register, cmov_abi_size),
registerAlias(val_reg, cmov_abi_size),
.l,
- );
+ ),
+ .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
+ registerAlias(dst_mcv.register, cmov_abi_size),
+ src_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)),
+ .l,
+ ),
+ else => {
+ const val_reg = try self.copyToTmpRegister(ty, src_mcv);
+ try self.asmCmovccRegisterRegister(
+ registerAlias(dst_mcv.register, cmov_abi_size),
+ registerAlias(val_reg, cmov_abi_size),
+ .l,
+ );
+ },
+ }
+ break :result dst_mcv;
+ },
+ .Float => return self.floatSign(inst, ty_op.operand, ty),
+ .Vector => switch (ty.childType(mod).zigTypeTag(mod)) {
+ else => null,
+ .Int => switch (ty.childType(mod).intInfo(mod).bits) {
+ else => null,
+ 8 => switch (ty.vectorLen(mod)) {
+ else => null,
+ 1...16 => if (self.hasFeature(.avx))
+ .{ .vp_b, .abs }
+ else if (self.hasFeature(.ssse3))
+ .{ .p_b, .abs }
+ else
+ null,
+ 17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null,
+ },
+ 16 => switch (ty.vectorLen(mod)) {
+ else => null,
+ 1...8 => if (self.hasFeature(.avx))
+ .{ .vp_w, .abs }
+ else if (self.hasFeature(.ssse3))
+ .{ .p_w, .abs }
+ else
+ null,
+ 9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null,
+ },
+ 32 => switch (ty.vectorLen(mod)) {
+ else => null,
+ 1...4 => if (self.hasFeature(.avx))
+ .{ .vp_d, .abs }
+ else if (self.hasFeature(.ssse3))
+ .{ .p_d, .abs }
+ else
+ null,
+ 5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null,
+ },
},
- }
- return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
- },
- .Float => return self.floatSign(inst, ty_op.operand, ty),
- else => unreachable,
- }
+ .Float => return self.floatSign(inst, ty_op.operand, ty),
+ },
+ }) orelse return self.fail("TODO implement airAbs for {}", .{ty.fmt(mod)});
+
+ const abi_size: u32 = @intCast(ty.abiSize(mod));
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
+ src_mcv.getReg().?
+ else
+ try self.register_manager.allocReg(inst, self.regClassForType(ty));
+ const dst_alias = registerAlias(dst_reg, abi_size);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ mir_tag,
+ dst_alias,
+ src_mcv.mem(self.memPtrSize(ty)),
+ ) else try self.asmRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, src_mcv), abi_size),
+ );
+ break :result .{ .register = dst_reg };
+ };
+ return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
src/arch/x86_64/Encoding.zig
@@ -316,6 +316,8 @@ pub const Mnemonic = enum {
xorpd,
// SSE3
movddup, movshdup, movsldup,
+ // SSSE3
+ pabsb, pabsd, pabsw,
// SSE4.1
blendpd, blendps, blendvpd, blendvps,
extractps,
@@ -353,6 +355,7 @@ pub const Mnemonic = enum {
vmovupd, vmovups,
vmulpd, vmulps, vmulsd, vmulss,
vorpd, vorps,
+ vpabsb, vpabsd, vpabsw,
vpackssdw, vpacksswb, vpackusdw, vpackuswb,
vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
vpand, vpandn,
@@ -750,6 +753,7 @@ pub const Feature = enum {
sse2,
sse3,
sse4_1,
+ ssse3,
x87,
};
src/arch/x86_64/encodings.zig
@@ -1108,6 +1108,14 @@ pub const table = [_]Entry{
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
+ // SSSE3
+ .{ .pabsb, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 },
+ .{ .pabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 },
+ .{ .pabsd, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 },
+ .{ .pabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 },
+ .{ .pabsw, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 },
+ .{ .pabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 },
+
// SSE4.1
.{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
@@ -1368,6 +1376,10 @@ pub const table = [_]Entry{
.{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
.{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
+ .{ .vpabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_128_wig, .avx },
+ .{ .vpabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_128_wig, .avx },
+ .{ .vpabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_128_wig, .avx },
+
.{ .vpacksswb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_128_wig, .avx },
.{ .vpackssdw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_128_wig, .avx },
@@ -1537,6 +1549,10 @@ pub const table = [_]Entry{
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
+ .{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 },
+ .{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 },
+ .{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 },
+
.{ .vpacksswb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_256_wig, .avx2 },
.{ .vpackssdw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_256_wig, .avx2 },
test/behavior/abs.zig
@@ -280,7 +280,7 @@ test "@abs float vectors" {
try testAbsFloatVectors(f16, 16);
try comptime testAbsFloatVectors(f16, 17);
- try testAbsFloatVectors(f32, 17);
+ try testAbsFloatVectors(f32, 1);
try comptime testAbsFloatVectors(f32, 1);
try testAbsFloatVectors(f32, 1);
try comptime testAbsFloatVectors(f32, 2);