Commit 9ccdbca635

Jacob Young <jacobly0@users.noreply.github.com>
2023-05-02 09:24:04
x86_64: implement fabs
1 parent 31429a4
Changed files (6)
src/arch/x86_64/CodeGen.zig
@@ -1458,14 +1458,13 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .log,
             .log2,
             .log10,
-            .fabs,
             .floor,
             .ceil,
             .round,
             .trunc_float,
             => try self.airUnaryMath(inst),
 
-            .neg => try self.airNeg(inst),
+            .neg, .fabs => try self.airFloatSign(inst),
 
             .add_with_overflow => try self.airAddSubWithOverflow(inst),
             .sub_with_overflow => try self.airAddSubWithOverflow(inst),
@@ -4185,7 +4184,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
 }
 
-fn airNeg(self: *Self, inst: Air.Inst.Index) !void {
+fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const ty = self.air.typeOf(un_op);
     const ty_bits = ty.floatBits(self.target.*);
@@ -4228,10 +4227,19 @@ fn airNeg(self: *Self, inst: Air.Inst.Index) !void {
     const dst_lock = self.register_manager.lockReg(dst_mcv.register);
     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
 
+    const tag = self.air.instructions.items(.tag)[inst];
     try self.genBinOpMir(switch (ty_bits) {
-        32 => .xorps,
-        64 => .xorpd,
-        else => return self.fail("TODO implement airNeg for {}", .{
+        32 => switch (tag) {
+            .neg => .xorps,
+            .fabs => .andnps,
+            else => unreachable,
+        },
+        64 => switch (tag) {
+            .neg => .xorpd,
+            .fabs => .andnpd,
+            else => unreachable,
+        },
+        else => return self.fail("TODO implement airFloatSign for {}", .{
             ty.fmt(self.bin_file.options.module.?),
         }),
     }, vec_ty, dst_mcv, sign_mcv);
src/arch/x86_64/Encoding.zig
@@ -268,23 +268,29 @@ pub const Mnemonic = enum {
     movd,
     // SSE
     addss,
+    andps,
+    andnps,
     cmpss,
     cvtsi2ss,
     divss,
     maxss, minss,
     movss,
     mulss,
+    orps,
     subss,
     ucomiss,
     xorps,
     // SSE2
     addsd,
+    andpd,
+    andnpd,
     //cmpsd,
     cvtsd2ss, cvtsi2sd, cvtss2sd,
     divsd,
     maxsd, minsd,
     movq, //movd, movsd,
     mulsd,
+    orpd,
     subsd,
     ucomisd,
     xorpd,
src/arch/x86_64/encodings.zig
@@ -832,6 +832,10 @@ pub const table = [_]Entry{
     // SSE
     .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .sse },
 
+    .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .sse },
+
+    .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .sse },
+
     .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse },
 
     .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse },
@@ -848,6 +852,8 @@ pub const table = [_]Entry{
 
     .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .sse },
 
+    .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .sse },
+
     .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .sse },
 
     .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse },
@@ -857,6 +863,10 @@ pub const table = [_]Entry{
     // SSE2
     .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 },
 
+    .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .sse2 },
+
+    .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .sse2 },
+
     .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 },
 
     .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 },
@@ -883,6 +893,8 @@ pub const table = [_]Entry{
 
     .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .sse2 },
 
+    .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .sse2 },
+
     .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .sse2 },
 
     .{ .movsd, .rm, &.{ .xmm,     .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .sse2 },
src/arch/x86_64/Lower.zig
@@ -94,6 +94,8 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
         .xor,
 
         .addss,
+        .andnps,
+        .andps,
         .cmpss,
         .cvtsi2ss,
         .divss,
@@ -101,11 +103,14 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
         .minss,
         .movss,
         .mulss,
+        .orps,
         .roundss,
         .subss,
         .ucomiss,
         .xorps,
         .addsd,
+        .andnpd,
+        .andpd,
         .cmpsd,
         .cvtsd2ss,
         .cvtsi2sd,
@@ -115,6 +120,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
         .minsd,
         .movsd,
         .mulsd,
+        .orpd,
         .roundsd,
         .subsd,
         .ucomisd,
src/arch/x86_64/Mir.zig
@@ -168,6 +168,10 @@ pub const Inst = struct {
 
         /// Add single precision floating point values
         addss,
+        /// Bitwise logical and of packed single precision floating-point values
+        andps,
+        /// Bitwise logical and not of packed single precision floating-point values
+        andnps,
         /// Compare scalar single-precision floating-point values
         cmpss,
         /// Convert doubleword integer to scalar single-precision floating-point value
@@ -182,6 +186,8 @@ pub const Inst = struct {
         movss,
         /// Multiply scalar single-precision floating-point values
         mulss,
+        /// Bitwise logical or of packed single precision floating-point values
+        orps,
         /// Round scalar single-precision floating-point values
         roundss,
         /// Subtract scalar single-precision floating-point values
@@ -192,6 +198,10 @@ pub const Inst = struct {
         xorps,
         /// Add double precision floating point values
         addsd,
+        /// Bitwise logical and not of packed double precision floating-point values
+        andnpd,
+        /// Bitwise logical and of packed double precision floating-point values
+        andpd,
         /// Compare scalar double-precision floating-point values
         cmpsd,
         /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
@@ -210,6 +220,8 @@ pub const Inst = struct {
         movsd,
         /// Multiply scalar double-precision floating-point values
         mulsd,
+        /// Bitwise logical or of packed double precision floating-point values
+        orpd,
         /// Round scalar double-precision floating-point values
         roundsd,
         /// Subtract scalar double-precision floating-point values
test/behavior/floatop.zig
@@ -96,7 +96,8 @@ test "negative f128 floatToInt at compile-time" {
 }
 
 test "@sqrt" {
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64 and
+        comptime !std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sse, .sse2, .sse4_1 })) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO