Commit 5a2c547fc1

David Rubin <daviru007@icloud.com>
2024-07-02 11:41:14
riscv: vectors part 3
1 parent 09e9812
lib/std/start.zig
@@ -221,7 +221,26 @@ fn riscv_start() callconv(.C) noreturn {
             }
             break :ret root.main();
         },
-        else => @compileError("expected return type of main to be 'void', 'noreturn', 'u8'"),
+        .ErrorUnion => ret: {
+            const result = root.main() catch {
+                const stderr = std.io.getStdErr().writer();
+                stderr.writeAll("failed with error\n") catch {
+                    @panic("failed to print when main returned error");
+                };
+                break :ret 1;
+            };
+            switch (@typeInfo(@TypeOf(result))) {
+                .Void => break :ret 0,
+                .Int => |info| {
+                    if (info.bits != 8 or info.signedness == .signed) {
+                        @compileError(bad_main_ret);
+                    }
+                    return result;
+                },
+                else => @compileError(bad_main_ret),
+            }
+        },
+        else => @compileError(bad_main_ret),
     });
 }
 
src/arch/riscv64/bits.zig
@@ -41,7 +41,7 @@ pub const Memory = struct {
                 2...2 => .hword,
                 3...4 => .word,
                 5...8 => .dword,
-                else => unreachable,
+                else => std.debug.panic("fromByteSize {}", .{size}),
             };
         }
 
@@ -221,7 +221,7 @@ pub const Register = enum(u8) {
             // zig fmt: off
             @intFromEnum(Register.zero) ... @intFromEnum(Register.x31) => 64,
             @intFromEnum(Register.ft0)  ... @intFromEnum(Register.f31) => if (Target.riscv.featureSetHas(features, .d)) 64 else 32,
-            @intFromEnum(Register.v0)   ... @intFromEnum(Register.v31) => 1024, // TODO: look at suggestVectorSize
+            @intFromEnum(Register.v0)   ... @intFromEnum(Register.v31) => 256, // TODO: look at suggestVectorSize
             else => unreachable,
             // zig fmt: on
         };
src/arch/riscv64/CodeGen.zig
@@ -88,8 +88,9 @@ exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .{},
 /// across each runtime branch upon joining.
 branch_stack: *std.ArrayList(Branch),
 
-// The current bit length of vector registers.
-vec_len: u32,
+// Currently set vector properties, null means they haven't been set yet in the function.
+avl: ?u64,
+vtype: ?bits.VType,
 
 // Key is the block instruction
 blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .{},
@@ -751,7 +752,8 @@ pub fn generate(
         .end_di_line = func.rbrace_line,
         .end_di_column = func.rbrace_column,
         .scope_generation = 0,
-        .vec_len = 16 * 8, // TODO: set this per cpu
+        .avl = null,
+        .vtype = null,
     };
     defer {
         function.frame_allocs.deinit(gpa);
@@ -1064,8 +1066,17 @@ fn getCsr(func: *Func, csr: CSR) !Register {
     return dst_reg;
 }
 
-fn setVl(func: *Func, dst_reg: Register, avl: u5, options: bits.VType) !void {
+fn setVl(func: *Func, dst_reg: Register, avl: u64, options: bits.VType) !void {
+    if (func.avl == avl) if (func.vtype) |vtype| {
+        // it's already set, we don't need to do anything
+        if (@as(u8, @bitCast(vtype)) == @as(u8, @bitCast(options))) return;
+    };
+
+    func.avl = avl;
+    func.vtype = options;
+
     if (avl == 0) {
+        // the caller means to do "vsetvli zero, zero ..." which keeps the avl to whatever it was before
         const options_int: u12 = @as(u12, 0) | @as(u8, @bitCast(options));
         _ = try func.addInst(.{
             .tag = .vsetvli,
@@ -1077,18 +1088,33 @@ fn setVl(func: *Func, dst_reg: Register, avl: u5, options: bits.VType) !void {
             } },
         });
     } else {
-        const options_int: u12 = (~@as(u12, 0) << 10) | @as(u8, @bitCast(options));
-        _ = try func.addInst(.{
-            .tag = .vsetivli,
-            .ops = .rri,
-            .data = .{
-                .i_type = .{
+        // if the avl can fit into u5 we can use vsetivli otherwise use vsetvli
+        if (avl <= std.math.maxInt(u5)) {
+            const options_int: u12 = (~@as(u12, 0) << 10) | @as(u8, @bitCast(options));
+            _ = try func.addInst(.{
+                .tag = .vsetivli,
+                .ops = .rri,
+                .data = .{
+                    .i_type = .{
+                        .rd = dst_reg,
+                        .rs1 = @enumFromInt(avl),
+                        .imm12 = Immediate.u(options_int),
+                    },
+                },
+            });
+        } else {
+            const options_int: u12 = @as(u12, 0) | @as(u8, @bitCast(options));
+            const temp_reg = try func.copyToTmpRegister(Type.usize, .{ .immediate = avl });
+            _ = try func.addInst(.{
+                .tag = .vsetvli,
+                .ops = .rri,
+                .data = .{ .i_type = .{
                     .rd = dst_reg,
-                    .rs1 = @enumFromInt(avl),
+                    .rs1 = temp_reg,
                     .imm12 = Immediate.u(options_int),
-                },
-            },
-        });
+                } },
+            });
+        }
     }
 }
 
@@ -1939,7 +1965,7 @@ fn allocRegOrMem(func: *Func, elem_ty: Type, inst: ?Air.Inst.Index, reg_ok: bool
     const bit_size = elem_ty.bitSize(pt);
     const min_size: u64 = switch (elem_ty.zigTypeTag(pt.zcu)) {
         .Float => if (func.hasFeature(.d)) 64 else 32,
-        .Vector => func.vec_len,
+        .Vector => 256, // TODO: calculate it from avl * vsew
         else => 64,
     };
 
@@ -2293,7 +2319,11 @@ fn binOp(
         return func.fail("binOp libcall runtime-float ops", .{});
     }
 
-    if (lhs_ty.bitSize(pt) > 64) return func.fail("TODO: binOp >= 64 bits", .{});
+    // don't have support for certain sizes of addition
+    switch (lhs_ty.zigTypeTag(pt.zcu)) {
+        .Vector => {}, // works differently and fails in a different place
+        else => if (lhs_ty.bitSize(pt) > 64) return func.fail("TODO: binOp >= 64 bits", .{}),
+    }
 
     const lhs_mcv = try func.resolveInst(lhs_air);
     const rhs_mcv = try func.resolveInst(rhs_air);
@@ -2442,17 +2472,25 @@ fn genBinOp(
                     });
                 },
                 .Vector => {
+                    const num_elem = lhs_ty.vectorLen(zcu);
+                    const elem_size = lhs_ty.childType(zcu).bitSize(pt);
+
+                    const child_ty = lhs_ty.childType(zcu);
+
                     const mir_tag: Mir.Inst.Tag = switch (tag) {
-                        .add => .vaddvv,
-                        .sub => .vsubvv,
+                        .add => switch (child_ty.zigTypeTag(zcu)) {
+                            .Int => .vaddvv,
+                            .Float => .vfaddvv,
+                            else => unreachable,
+                        },
+                        .sub => switch (child_ty.zigTypeTag(zcu)) {
+                            .Int => .vsubvv,
+                            .Float => .vfsubvv,
+                            else => unreachable,
+                        },
                         else => return func.fail("TODO: genBinOp {s} Vector", .{@tagName(tag)}),
                     };
 
-                    const num_elem: u5 = math.cast(u5, lhs_ty.vectorLen(zcu)) orelse {
-                        return func.fail("TODO: genBinOp use vsetvli for larger avl sizes", .{});
-                    };
-                    const elem_size = lhs_ty.childType(zcu).bitSize(pt);
-
                     try func.setVl(.zero, num_elem, .{
                         .vsew = switch (elem_size) {
                             8 => .@"8",
@@ -2761,78 +2799,55 @@ fn airAddWithOverflow(func: *Func, inst: Air.Inst.Index) !void {
     const extra = func.air.extraData(Air.Bin, ty_pl.payload).data;
 
     const result: MCValue = if (func.liveness.isUnused(inst)) .unreach else result: {
-        const lhs_ty = func.typeOf(extra.lhs);
-
-        const int_info = lhs_ty.intInfo(zcu);
-
-        const tuple_ty = func.typeOfIndex(inst);
-        const result_mcv = try func.allocRegOrMem(tuple_ty, inst, false);
-        const offset = result_mcv.load_frame;
+        const ty = func.typeOf(extra.lhs);
+        switch (ty.zigTypeTag(zcu)) {
+            .Vector => return func.fail("TODO implement add with overflow for Vector type", .{}),
+            .Int => {
+                const int_info = ty.intInfo(zcu);
 
-        if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
-            const add_result = try func.binOp(null, .add, extra.lhs, extra.rhs);
-            const add_result_reg = try func.copyToTmpRegister(lhs_ty, add_result);
-            const add_result_reg_lock = func.register_manager.lockRegAssumeUnused(add_result_reg);
-            defer func.register_manager.unlockReg(add_result_reg_lock);
+                const tuple_ty = func.typeOfIndex(inst);
+                const result_mcv = try func.allocRegOrMem(tuple_ty, inst, false);
+                const offset = result_mcv.load_frame;
 
-            const shift_amount: u6 = @intCast(Type.usize.bitSize(pt) - int_info.bits);
+                if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
+                    const add_result = try func.binOp(null, .add, extra.lhs, extra.rhs);
 
-            const shift_reg, const shift_lock = try func.allocReg(.int);
-            defer func.register_manager.unlockReg(shift_lock);
+                    const add_result_reg = try func.copyToTmpRegister(ty, add_result);
+                    const add_result_reg_lock = func.register_manager.lockRegAssumeUnused(add_result_reg);
+                    defer func.register_manager.unlockReg(add_result_reg_lock);
 
-            _ = try func.addInst(.{
-                .tag = .slli,
-                .ops = .rri,
-                .data = .{
-                    .i_type = .{
-                        .rd = shift_reg,
-                        .rs1 = add_result_reg,
-                        .imm12 = Immediate.u(shift_amount),
-                    },
-                },
-            });
-
-            _ = try func.addInst(.{
-                .tag = if (int_info.signedness == .unsigned) .srli else .srai,
-                .ops = .rri,
-                .data = .{
-                    .i_type = .{
-                        .rd = shift_reg,
-                        .rs1 = shift_reg,
-                        .imm12 = Immediate.u(shift_amount),
-                    },
-                },
-            });
-
-            try func.genSetMem(
-                .{ .frame = offset.index },
-                offset.off + @as(i32, @intCast(tuple_ty.structFieldOffset(0, pt))),
-                lhs_ty,
-                add_result,
-            );
+                    try func.genSetMem(
+                        .{ .frame = offset.index },
+                        offset.off + @as(i32, @intCast(tuple_ty.structFieldOffset(0, pt))),
+                        ty,
+                        add_result,
+                    );
 
-            const overflow_reg, const overflow_lock = try func.allocReg(.int);
-            defer func.register_manager.unlockReg(overflow_lock);
+                    const overflow_reg, const overflow_lock = try func.allocReg(.int);
+                    defer func.register_manager.unlockReg(overflow_lock);
 
-            try func.genBinOp(
-                .cmp_neq,
-                .{ .register = shift_reg },
-                lhs_ty,
-                .{ .register = add_result_reg },
-                lhs_ty,
-                overflow_reg,
-            );
+                    try func.genBinOp(
+                        .cmp_neq,
+                        .{ .register = add_result_reg },
+                        ty,
+                        .{ .register = add_result_reg },
+                        ty,
+                        overflow_reg,
+                    );
 
-            try func.genSetMem(
-                .{ .frame = offset.index },
-                offset.off + @as(i32, @intCast(tuple_ty.structFieldOffset(1, pt))),
-                Type.u1,
-                .{ .register = overflow_reg },
-            );
+                    try func.genSetMem(
+                        .{ .frame = offset.index },
+                        offset.off + @as(i32, @intCast(tuple_ty.structFieldOffset(1, pt))),
+                        Type.u1,
+                        .{ .register = overflow_reg },
+                    );
 
-            break :result result_mcv;
-        } else {
-            return func.fail("TODO: less than 8 bit or non-pow 2 addition", .{});
+                    break :result result_mcv;
+                } else {
+                    return func.fail("TODO: less than 8 bit or non-pow 2 addition", .{});
+                }
+            },
+            else => unreachable,
         }
     };
 
@@ -5519,8 +5534,6 @@ fn airAsm(func: *Func, inst: Air.Inst.Index) !void {
     const inputs: []const Air.Inst.Ref = @ptrCast(func.air.extra[extra_i..][0..extra.data.inputs_len]);
     extra_i += inputs.len;
 
-    log.debug("airAsm input: {any}", .{inputs});
-
     const dead = !is_volatile and func.liveness.isUnused(inst);
     const result: MCValue = if (dead) .unreach else result: {
         if (outputs.len > 1) {
@@ -5897,7 +5910,7 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
     const max_size: u32 = switch (reg.class()) {
         .int => 64,
         .float => if (func.hasFeature(.d)) 64 else 32,
-        .vector => func.vec_len,
+        .vector => 64, // TODO: calculate it from avl * vsew
     };
     if (abi_size > max_size) return std.debug.panic("tried to set reg with size {}", .{abi_size});
     const dst_reg_class = reg.class();
@@ -6033,6 +6046,8 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
         .register_pair => return func.fail("genSetReg should we allow reg -> reg_pair?", .{}),
         .load_frame => |frame| {
             if (reg.class() == .vector) {
+                // vectors don't support an offset memory load so we need to put the true
+                // address into a register before loading from it.
                 const addr_reg, const addr_lock = try func.allocReg(.int);
                 defer func.register_manager.unlockReg(addr_lock);
 
@@ -6073,28 +6088,30 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
             _ = try func.addInst(.{
                 .tag = .pseudo,
                 .ops = .pseudo_lea_rm,
-                .data = .{ .rm = .{
-                    .r = reg,
-                    .m = switch (src_mcv) {
-                        .register_offset => |reg_off| .{
-                            .base = .{ .reg = reg_off.reg },
-                            .mod = .{
-                                .size = func.memSize(ty),
-                                .disp = reg_off.off,
-                                .unsigned = false,
+                .data = .{
+                    .rm = .{
+                        .r = reg,
+                        .m = switch (src_mcv) {
+                            .register_offset => |reg_off| .{
+                                .base = .{ .reg = reg_off.reg },
+                                .mod = .{
+                                    .size = .byte, // the size doesn't matter
+                                    .disp = reg_off.off,
+                                    .unsigned = false,
+                                },
                             },
-                        },
-                        .lea_frame => |frame| .{
-                            .base = .{ .frame = frame.index },
-                            .mod = .{
-                                .size = func.memSize(ty),
-                                .disp = frame.off,
-                                .unsigned = false,
+                            .lea_frame => |frame| .{
+                                .base = .{ .frame = frame.index },
+                                .mod = .{
+                                    .size = .byte, // the size doesn't matter
+                                    .disp = frame.off,
+                                    .unsigned = false,
+                                },
                             },
+                            else => unreachable,
                         },
-                        else => unreachable,
                     },
-                } },
+                },
             });
         },
         .indirect => |reg_off| {
@@ -6119,9 +6136,7 @@ fn genSetReg(func: *Func, ty: Type, reg: Register, src_mcv: MCValue) InnerError!
                     // There is no vector instruction for loading with an offset to a base register,
                     // so we need to get an offset register containing the address of the vector first
                     // and load from it.
-                    const len: u5 = math.cast(u5, ty.vectorLen(zcu)) orelse {
-                        return func.fail("TODO: genSetReg load_frame -> vec reg, vector length doesn't fit into imm avl", .{});
-                    };
+                    const len = ty.vectorLen(zcu);
                     const elem_ty = ty.childType(zcu);
                     const elem_size = elem_ty.abiSize(pt);
 
@@ -6202,6 +6217,8 @@ fn genSetMem(
     src_mcv: MCValue,
 ) InnerError!void {
     const pt = func.pt;
+    const zcu = pt.zcu;
+
     const abi_size: u32 = @intCast(ty.abiSize(pt));
     const dst_ptr_mcv: MCValue = switch (base) {
         .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } },
@@ -6252,10 +6269,8 @@ fn genSetMem(
             if (reg.class() == .vector) {
                 const addr_reg = try func.copyToTmpRegister(Type.usize, dst_ptr_mcv);
 
-                const num_elem: u5 = math.cast(u5, ty.vectorLen(pt.zcu)) orelse {
-                    return func.fail("TODO: genBinOp use vsetvli for larger avl sizes", .{});
-                };
-                const elem_size = ty.childType(pt.zcu).bitSize(pt);
+                const num_elem = ty.vectorLen(zcu);
+                const elem_size = ty.childType(zcu).bitSize(pt);
 
                 try func.setVl(.zero, num_elem, .{
                     .vsew = switch (elem_size) {
@@ -6279,7 +6294,7 @@ fn genSetMem(
                             .base = .{ .reg = addr_reg },
                             .mod = .{
                                 .disp = 0,
-                                .size = func.memSize(ty.childType(pt.zcu)),
+                                .size = func.memSize(ty.childType(zcu)),
                                 .unsigned = false,
                             },
                         },
src/arch/riscv64/Encoding.zig
@@ -285,6 +285,9 @@ pub const Mnemonic = enum {
     vaddvv,
     vsubvv,
 
+    vfaddvv,
+    vfsubvv,
+
     vadcvv,
 
     vmvvx,
@@ -316,6 +319,8 @@ pub const Mnemonic = enum {
     amomaxud,
     amominud,
 
+    // TODO: Q extension
+
     pub fn encoding(mnem: Mnemonic) Enc {
         return switch (mnem) {
             // zig fmt: off
@@ -542,6 +547,9 @@ pub const Mnemonic = enum {
             .vaddvv         => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000000, .funct3 = .OPIVV } } },
             .vsubvv         => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000010, .funct3 = .OPIVV } } },
             
+            .vfaddvv         => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000000, .funct3 = .OPFVV } } },
+            .vfsubvv         => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b000010, .funct3 = .OPFVV } } },
+            
             .vadcvv         => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010000, .funct3 = .OPMVV } } },
             .vmvvx          => .{ .opcode = .OP_V, .data = .{ .vecmath = .{ .vm = true, .funct6 = 0b010111, .funct3 = .OPIVX } } },
 
@@ -702,6 +710,8 @@ pub const InstEnc = enum {
 
             .vaddvv,
             .vsubvv,
+            .vfaddvv,
+            .vfsubvv,
             .vadcvv,
             .vmvvx,
             .vslidedownvx,
src/arch/riscv64/Mir.zig
@@ -142,12 +142,11 @@ pub const Inst = struct {
         vsetivli,
         vsetvl,
         vaddvv,
+        vfaddvv,
         vsubvv,
+        vfsubvv,
         vslidedownvx,
 
-        // A Extension Instructions
-        amo,
-
         /// A pseudo-instruction. Used for anything that isn't 1:1 with an
         /// assembly instruction.
         pseudo,
test/behavior/byteswap.zig
@@ -100,7 +100,6 @@ test "@byteSwap vectors u8" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     try comptime vector8();
     try vector8();
test/behavior/cast.zig
@@ -1985,7 +1985,6 @@ test "peer type resolution: vector and array and tuple" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     var vec: @Vector(2, i8) = .{ 10, 20 };
     var arr: [2]i8 = .{ 30, 40 };
test/behavior/globals.zig
@@ -18,7 +18,6 @@ test "store to global vector" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     try expect(vpos[1] == 0.0);
     vpos = @Vector(2, f32){ 0.0, 1.0 };
test/behavior/sizeof_and_typeof.zig
@@ -19,8 +19,6 @@ test "@sizeOf on compile-time types" {
 }
 
 test "@TypeOf() with multiple arguments" {
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-
     {
         var var_1: u32 = undefined;
         var var_2: u8 = undefined;
test/behavior/vector.zig
@@ -97,7 +97,6 @@ test "vector int operators" {
 
 test "vector float operators" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -105,21 +104,34 @@ test "vector float operators" {
     if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArmOrThumb()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
-        const S = struct {
-            fn doTheTest() !void {
-                var v: @Vector(4, T) = [4]T{ 10, 20, 30, 40 };
-                var x: @Vector(4, T) = [4]T{ 1, 2, 3, 4 };
-                _ = .{ &v, &x };
-                try expect(mem.eql(T, &@as([4]T, v + x), &[4]T{ 11, 22, 33, 44 }));
-                try expect(mem.eql(T, &@as([4]T, v - x), &[4]T{ 9, 18, 27, 36 }));
-                try expect(mem.eql(T, &@as([4]T, v * x), &[4]T{ 10, 40, 90, 160 }));
-                try expect(mem.eql(T, &@as([4]T, -x), &[4]T{ -1, -2, -3, -4 }));
-            }
-        };
-        try S.doTheTest();
-        try comptime S.doTheTest();
-    }
+    const S = struct {
+        fn doTheTest(T: type) !void {
+            var v: @Vector(4, T) = .{ 10, 20, 30, 40 };
+            var x: @Vector(4, T) = .{ 1, 2, 3, 4 };
+            _ = .{ &v, &x };
+            try expectEqual(v + x, .{ 11, 22, 33, 44 });
+            try expectEqual(v - x, .{ 9, 18, 27, 36 });
+            try expectEqual(v * x, .{ 10, 40, 90, 160 });
+            try expectEqual(-x, .{ -1, -2, -3, -4 });
+        }
+    };
+
+    try S.doTheTest(f32);
+    try comptime S.doTheTest(f32);
+
+    try S.doTheTest(f64);
+    try comptime S.doTheTest(f64);
+
+    try S.doTheTest(f16);
+    try comptime S.doTheTest(f16);
+
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+
+    try S.doTheTest(f80);
+    try comptime S.doTheTest(f80);
+
+    try S.doTheTest(f128);
+    try comptime S.doTheTest(f128);
 }
 
 test "vector bit operators" {
@@ -1228,7 +1240,6 @@ test "loading the second vector from a slice of vectors" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     @setRuntimeSafety(false);
     var small_bases = [2]@Vector(2, u8){
@@ -1245,7 +1256,6 @@ test "array of vectors is copied" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     const Vec3 = @Vector(3, i32);
     var points = [_]Vec3{
@@ -1316,7 +1326,6 @@ test "zero multiplicand" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     const zeros = @Vector(2, u32){ 0.0, 0.0 };
     var ones = @Vector(2, u32){ 1.0, 1.0 };
@@ -1411,7 +1420,6 @@ test "store to vector in slice" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     var v = [_]@Vector(3, f32){
         .{ 1, 1, 1 },
@@ -1478,7 +1486,6 @@ test "store vector with memset" {
 test "addition of vectors represented as strings" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
     const V = @Vector(3, u8);
     const foo: V = "foo".*;