Commit cba195c117

Jacob Young <jacobly0@users.noreply.github.com>
2023-05-07 04:27:39
x86_64: implement some float and float vector movement
This allows actually storing value of these supported types in registers, and not restricting them to stack slots.
1 parent 3a5e3c5
src/arch/x86_64/CodeGen.zig
@@ -2008,6 +2008,11 @@ fn computeFrameLayout(self: *Self) !FrameLayout {
     };
 }
 
+fn getFrameAddrAlignment(self: *Self, frame_addr: FrameAddr) u32 {
+    const alloc_align = @as(u32, 1) << self.frame_allocs.get(@enumToInt(frame_addr.index)).abi_align;
+    return @min(alloc_align, @bitCast(u32, frame_addr.off) & (alloc_align - 1));
+}
+
 fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex {
     const frame_allocs_slice = self.frame_allocs.slice();
     const frame_size = frame_allocs_slice.items(.abi_size);
@@ -2051,24 +2056,36 @@ fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue {
     return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok);
 }
 
-fn allocRegOrMemAdvanced(self: *Self, elem_ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue {
-    const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse {
+fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue {
+    const abi_size = math.cast(u32, ty.abiSize(self.target.*)) orelse {
         const mod = self.bin_file.options.module.?;
-        return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)});
+        return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(mod)});
     };
 
-    if (reg_ok) {
-        // Make sure the type can fit in a register before we try to allocate one.
-        const ptr_bits = self.target.cpu.arch.ptrBitWidth();
-        const ptr_bytes: u64 = @divExact(ptr_bits, 8);
-        if (abi_size <= ptr_bytes) {
-            if (self.register_manager.tryAllocReg(inst, regClassForType(elem_ty))) |reg| {
+    if (reg_ok) need_mem: {
+        if (abi_size <= @as(u32, switch (ty.zigTypeTag()) {
+            .Float => switch (ty.floatBits(self.target.*)) {
+                16, 32, 64, 128 => 16,
+                80 => break :need_mem,
+                else => unreachable,
+            },
+            .Vector => switch (ty.childType().zigTypeTag()) {
+                .Float => switch (ty.childType().floatBits(self.target.*)) {
+                    16, 32, 64 => if (self.hasFeature(.avx)) 32 else 16,
+                    80, 128 => break :need_mem,
+                    else => unreachable,
+                },
+                else => break :need_mem,
+            },
+            else => 8,
+        })) {
+            if (self.register_manager.tryAllocReg(inst, regClassForType(ty))) |reg| {
                 return MCValue{ .register = registerAlias(reg, abi_size) };
             }
         }
     }
 
-    const frame_index = try self.allocFrameIndex(FrameAlloc.initType(elem_ty, self.target.*));
+    const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ty, self.target.*));
     return .{ .load_frame = .{ .index = frame_index } };
 }
 
@@ -4442,12 +4459,19 @@ fn airRound(self: *Self, inst: Air.Inst.Index, mode: Immediate) !void {
         }),
     };
     assert(dst_mcv.isRegister());
+    const abi_size = @intCast(u32, ty.abiSize(self.target.*));
+    const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size);
     if (src_mcv.isRegister())
-        try self.asmRegisterRegisterImmediate(mir_tag, dst_mcv.getReg().?, src_mcv.getReg().?, mode)
+        try self.asmRegisterRegisterImmediate(
+            mir_tag,
+            dst_reg,
+            registerAlias(src_mcv.getReg().?, abi_size),
+            mode,
+        )
     else
         try self.asmRegisterMemoryImmediate(
             mir_tag,
-            dst_mcv.getReg().?,
+            dst_reg,
             src_mcv.mem(Memory.PtrSize.fromSize(@intCast(u32, ty.abiSize(self.target.*)))),
             mode,
         );
@@ -7847,19 +7871,43 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAirResult(inst, result);
 }
 
-fn movMirTag(self: *Self, ty: Type) !Mir.Inst.Tag {
-    return switch (ty.zigTypeTag()) {
-        else => .mov,
+fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.Tag {
+    switch (ty.zigTypeTag()) {
+        else => return .mov,
         .Float => switch (ty.floatBits(self.target.*)) {
             16 => unreachable, // needs special handling
-            32 => .movss,
-            64 => .movsd,
-            128 => .movaps,
-            else => return self.fail("TODO movMirTag from {}", .{
-                ty.fmt(self.bin_file.options.module.?),
-            }),
+            32 => return if (self.hasFeature(.avx)) .vmovss else .movss,
+            64 => return if (self.hasFeature(.avx)) .vmovsd else .movsd,
+            128 => return if (self.hasFeature(.avx))
+                if (aligned) .vmovaps else .vmovups
+            else if (aligned) .movaps else .movups,
+            else => {},
         },
-    };
+        .Vector => switch (ty.childType().zigTypeTag()) {
+            .Float => switch (ty.childType().floatBits(self.target.*)) {
+                16 => unreachable, // needs special handling
+                32 => switch (ty.vectorLen()) {
+                    1 => return if (self.hasFeature(.avx)) .vmovss else .movss,
+                    2...4 => return if (self.hasFeature(.avx))
+                        if (aligned) .vmovaps else .vmovups
+                    else if (aligned) .movaps else .movups,
+                    5...8 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups,
+                    else => {},
+                },
+                64 => switch (ty.vectorLen()) {
+                    1 => return if (self.hasFeature(.avx)) .vmovsd else .movsd,
+                    2 => return if (self.hasFeature(.avx))
+                        if (aligned) .vmovaps else .vmovups
+                    else if (aligned) .movaps else .movups,
+                    3...4 => if (self.hasFeature(.avx)) return if (aligned) .vmovaps else .vmovups,
+                    else => {},
+                },
+                else => {},
+            },
+            else => {},
+        },
+    }
+    return self.fail("TODO movMirTag for {}", .{ty.fmt(self.bin_file.options.module.?)});
 }
 
 fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
@@ -8016,7 +8064,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
                             0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
                             else => .lea,
                         },
-                        .indirect, .load_frame => try self.movMirTag(ty),
+                        .indirect => try self.movMirTag(ty, false),
+                        .load_frame => |frame_addr| try self.movMirTag(
+                            ty,
+                            self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*),
+                        ),
                         .lea_frame => .lea,
                         else => unreachable,
                     },
@@ -8040,7 +8092,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
                         )
                     else
                         self.asmRegisterMemory(
-                            try self.movMirTag(ty),
+                            try self.movMirTag(ty, mem.isAlignedGeneric(
+                                u32,
+                                @bitCast(u32, small_addr),
+                                ty.abiAlignment(self.target.*),
+                            )),
                             registerAlias(dst_reg, abi_size),
                             src_mem,
                         );
@@ -8080,7 +8136,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
                 )
             else
                 try self.asmRegisterMemory(
-                    try self.movMirTag(ty),
+                    try self.movMirTag(ty, false),
                     registerAlias(dst_reg, abi_size),
                     src_mem,
                 );
@@ -8194,7 +8250,24 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
                 )
             else
                 try self.asmMemoryRegister(
-                    try self.movMirTag(ty),
+                    try self.movMirTag(ty, switch (base) {
+                        .none => mem.isAlignedGeneric(
+                            u32,
+                            @bitCast(u32, disp),
+                            ty.abiAlignment(self.target.*),
+                        ),
+                        .reg => |reg| switch (reg) {
+                            .es, .cs, .ss, .ds => mem.isAlignedGeneric(
+                                u32,
+                                @bitCast(u32, disp),
+                                ty.abiAlignment(self.target.*),
+                            ),
+                            else => false,
+                        },
+                        .frame => |frame_index| self.getFrameAddrAlignment(
+                            .{ .index = frame_index, .off = disp },
+                        ) >= ty.abiAlignment(self.target.*),
+                    }),
                     dst_mem,
                     registerAlias(src_reg, abi_size),
                 );
@@ -8415,7 +8488,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
         defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
 
         const dest = try self.allocRegOrMem(inst, true);
-        try self.genCopy(self.air.typeOfIndex(inst), dest, operand);
+        try self.genCopy(if (!dest.isMemory() or operand.isMemory()) dst_ty else src_ty, dest, operand);
         break :result dest;
     };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
src/arch/x86_64/encoder.zig
@@ -228,7 +228,7 @@ pub const Instruction = struct {
             .td => try encoder.imm64(inst.ops[0].mem.moffs.offset),
             else => {
                 const mem_op = switch (data.op_en) {
-                    .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0],
+                    .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
                     .rm, .rmi, .vmi => inst.ops[1],
                     .rvm, .rvmi => inst.ops[2],
                     else => unreachable,
@@ -239,6 +239,7 @@ pub const Instruction = struct {
                             .m, .mi, .m1, .mc, .vmi => enc.modRmExt(),
                             .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(),
                             .rm, .rmi, .rvm, .rvmi => inst.ops[0].reg.lowEnc(),
+                            .mvr => inst.ops[2].reg.lowEnc(),
                             else => unreachable,
                         };
                         try encoder.modRm_direct(rm, reg.lowEnc());
@@ -248,6 +249,7 @@ pub const Instruction = struct {
                             .m, .mi, .m1, .mc, .vmi => .none,
                             .mr, .mri, .mrc => inst.ops[1],
                             .rm, .rmi, .rvm, .rvmi => inst.ops[0],
+                            .mvr => inst.ops[2],
                             else => unreachable,
                         };
                         try encodeMemory(enc, mem, op, encoder);
@@ -315,7 +317,7 @@ pub const Instruction = struct {
                 }
             else
                 null,
-            .vmi, .rvm, .rvmi => unreachable,
+            .vmi, .rvm, .rvmi, .mvr => unreachable,
         };
         if (segment_override) |seg| {
             legacy.setSegmentOverride(seg);
@@ -350,7 +352,7 @@ pub const Instruction = struct {
                 rex.b = b_x_op.isBaseExtended();
                 rex.x = b_x_op.isIndexExtended();
             },
-            .vmi, .rvm, .rvmi => unreachable,
+            .vmi, .rvm, .rvmi, .mvr => unreachable,
         }
 
         try encoder.rex(rex);
@@ -372,10 +374,11 @@ pub const Instruction = struct {
         switch (op_en) {
             .np, .i, .zi, .fd, .td, .d => {},
             .o, .oi => vex.b = inst.ops[0].reg.isExtended(),
-            .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi => {
+            .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi, .mvr => {
                 const r_op = switch (op_en) {
                     .rm, .rmi, .rvm, .rvmi => inst.ops[0],
                     .mr, .mri, .mrc => inst.ops[1],
+                    .mvr => inst.ops[2],
                     .m, .mi, .m1, .mc, .vmi => .none,
                     else => unreachable,
                 };
@@ -383,7 +386,7 @@ pub const Instruction = struct {
 
                 const b_x_op = switch (op_en) {
                     .rm, .rmi, .vmi => inst.ops[1],
-                    .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0],
+                    .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
                     .rvm, .rvmi => inst.ops[2],
                     else => unreachable,
                 };
src/arch/x86_64/Encoding.zig
@@ -206,7 +206,7 @@ pub fn format(
             try writer.print("+{s} ", .{tag});
         },
         .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}),
-        .mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi => try writer.writeAll("/r "),
+        .mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi, .mvr => try writer.writeAll("/r "),
     }
 
     switch (encoding.data.op_en) {
@@ -230,7 +230,7 @@ pub fn format(
             };
             try writer.print("{s} ", .{tag});
         },
-        .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm => {},
+        .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm, .mvr => {},
     }
 
     try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
@@ -332,7 +332,12 @@ pub const Mnemonic = enum {
     // SSE4.1
     roundsd, roundss,
     // AVX
-    vmovddup, vmovshdup, vmovsldup,
+    vmovapd, vmovaps,
+    vmovddup,
+    vmovsd,
+    vmovshdup, vmovsldup,
+    vmovss,
+    vmovupd, vmovups,
     vpextrw, vpinsrw,
     vpshufhw, vpshuflw,
     vpsrld, vpsrlq, vpsrlw,
@@ -357,7 +362,7 @@ pub const OpEn = enum {
     fd, td,
     m1, mc, mi, mr, rm,
     rmi, mri, mrc,
-    vmi, rvm, rvmi,
+    vmi, rvm, rvmi, mvr,
     // zig fmt: on
 };
 
@@ -549,9 +554,10 @@ pub const Op = enum {
         return switch (op) {
             .rm8, .rm16, .rm32, .rm64,
             .r32_m16, .r64_m16,
-            .m8, .m16, .m32, .m64, .m80, .m128,
+            .m8, .m16, .m32, .m64, .m80, .m128, .m256,
             .m,
-            .xmm_m32, .xmm_m64, .xmm_m128, .ymm_m256,
+            .xmm_m32, .xmm_m64, .xmm_m128,
+            .ymm_m256,
             =>  true,
             else => false,
         };
src/arch/x86_64/encodings.zig
@@ -974,12 +974,42 @@ pub const table = [_]Entry{
     .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 },
 
     // AVX
+    .{ .vmovapd, .rm, &.{ .xmm,      .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128, .avx },
+    .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm      }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128, .avx },
+    .{ .vmovapd, .rm, &.{ .ymm,      .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256, .avx },
+    .{ .vmovapd, .mr, &.{ .ymm_m256, .ymm      }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256, .avx },
+
+    .{ .vmovaps, .rm, &.{ .xmm,      .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128, .avx },
+    .{ .vmovaps, .mr, &.{ .xmm_m128, .xmm      }, &.{ 0x0f, 0x29 }, 0, .vex_128, .avx },
+    .{ .vmovaps, .rm, &.{ .ymm,      .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256, .avx },
+    .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm      }, &.{ 0x0f, 0x29 }, 0, .vex_256, .avx },
+
     .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128, .avx },
 
+    .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx },
+    .{ .vmovsd, .rm,  &.{       .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx },
+    .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx },
+    .{ .vmovsd, .mr,  &.{       .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx },
+
     .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128, .avx },
 
     .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128, .avx },
 
+    .{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx },
+    .{ .vmovss, .rm,  &.{       .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx },
+    .{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx },
+    .{ .vmovss, .mr,  &.{       .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx },
+
+    .{ .vmovupd, .rm, &.{ .xmm,      .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128, .avx },
+    .{ .vmovupd, .mr, &.{ .xmm_m128, .xmm      }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128, .avx },
+    .{ .vmovupd, .rm, &.{ .ymm,      .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256, .avx },
+    .{ .vmovupd, .mr, &.{ .ymm_m256, .ymm      }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256, .avx },
+
+    .{ .vmovups, .rm, &.{ .xmm,      .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128, .avx },
+    .{ .vmovups, .mr, &.{ .xmm_m128, .xmm      }, &.{ 0x0f, 0x11 }, 0, .vex_128, .avx },
+    .{ .vmovups, .rm, &.{ .ymm,      .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256, .avx },
+    .{ .vmovups, .mr, &.{ .ymm_m256, .ymm      }, &.{ 0x0f, 0x11 }, 0, .vex_256, .avx },
+
     .{ .vpextrw, .mri, &.{ .r32,     .xmm, .imm8 }, &.{ 0x66, 0x0f,       0x15 }, 0, .vex_128,      .avx },
     .{ .vpextrw, .mri, &.{ .r64,     .xmm, .imm8 }, &.{ 0x66, 0x0f,       0x15 }, 0, .vex_128_long, .avx },
     .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128,      .avx },
src/arch/x86_64/Lower.zig
@@ -184,9 +184,15 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
         .roundsd,
         .roundss,
 
+        .vmovapd,
+        .vmovaps,
         .vmovddup,
+        .vmovsd,
         .vmovshdup,
         .vmovsldup,
+        .vmovss,
+        .vmovupd,
+        .vmovups,
         .vpextrw,
         .vpinsrw,
         .vpshufhw,
src/arch/x86_64/Mir.zig
@@ -282,12 +282,24 @@ pub const Inst = struct {
         /// Round scalar single-precision floating-point values
         roundss,
 
+        /// Move aligned packed double-precision floating-point values
+        vmovapd,
+        /// Move aligned packed single-precision floating-point values
+        vmovaps,
         /// Replicate double floating-point values
         vmovddup,
+        /// Move or merge scalar double-precision floating-point value
+        vmovsd,
         /// Replicate single floating-point values
         vmovshdup,
         /// Replicate single floating-point values
         vmovsldup,
+        /// Move or merge scalar single-precision floating-point value
+        vmovss,
+        /// Move unaligned packed double-precision floating-point values
+        vmovupd,
+        /// Move unaligned packed single-precision floating-point values
+        vmovups,
         /// Extract word
         vpextrw,
         /// Insert word
test/behavior/math.zig
@@ -399,7 +399,8 @@ fn testBinaryNot128(comptime Type: type, x: Type) !void {
 
 test "division" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64 and
+        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
test/behavior/muladd.zig
@@ -2,9 +2,11 @@ const std = @import("std");
 const builtin = @import("builtin");
 const expect = std.testing.expect;
 
+const stage2_x86_64_without_hardware_fma_support = builtin.zig_backend == .stage2_x86_64 and
+    !std.Target.x86.featureSetHas(builtin.cpu.features, .fma);
+
 test "@mulAdd" {
-    if (builtin.zig_backend == .stage2_x86_64 and
-        !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .fma)) return error.SkipZigTest; // TODO
+    if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -118,7 +120,7 @@ fn vector32() !void {
 
 test "vector f32" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -141,7 +143,7 @@ fn vector64() !void {
 
 test "vector f64" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (stage2_x86_64_without_hardware_fma_support) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO