Commit d03e9d0b83

LemonBoy <thatlemon@gmail.com>
2021-11-01 19:26:32
compiler-rt: Fix f16 API declarations to be consistent
LLVM and compiler-rt must agree on how the parameters are passed, it turns out that in LLVM13 something changed and broke the test case for AArch64 systems. It has nothing to do with fma at all. Closes #9900
1 parent 77ffffc
lib/std/special/compiler_rt/extendXfYf2.zig
@@ -1,6 +1,7 @@
 const std = @import("std");
 const builtin = @import("builtin");
 const is_test = builtin.is_test;
+const native_arch = builtin.cpu.arch;
 
 pub fn __extendsfdf2(a: f32) callconv(.C) f64 {
     return extendXfYf2(f64, f32, @bitCast(u32, a));
@@ -14,12 +15,16 @@ pub fn __extendsftf2(a: f32) callconv(.C) f128 {
     return extendXfYf2(f128, f32, @bitCast(u32, a));
 }
 
-pub fn __extendhfsf2(a: u16) callconv(.C) f32 {
-    return extendXfYf2(f32, f16, a);
+// AArch64 is the only ABI (at the moment) to support f16 arguments without the
+// need for extending them to wider fp types.
+pub const F16T = if (native_arch.isAARCH64()) f16 else u16;
+
+pub fn __extendhfsf2(a: F16T) callconv(.C) f32 {
+    return extendXfYf2(f32, f16, @bitCast(u16, a));
 }
 
-pub fn __extendhftf2(a: u16) callconv(.C) f128 {
-    return extendXfYf2(f128, f16, a);
+pub fn __extendhftf2(a: F16T) callconv(.C) f128 {
+    return extendXfYf2(f128, f16, @bitCast(u16, a));
 }
 
 pub fn __extendxftf2(a: c_longdouble) callconv(.C) f128 {
@@ -29,16 +34,14 @@ pub fn __extendxftf2(a: c_longdouble) callconv(.C) f128 {
 
 pub fn __aeabi_h2f(arg: u16) callconv(.AAPCS) f32 {
     @setRuntimeSafety(false);
-    return @call(.{ .modifier = .always_inline }, __extendhfsf2, .{arg});
+    return @call(.{ .modifier = .always_inline }, extendXfYf2, .{ f32, f16, arg });
 }
 
 pub fn __aeabi_f2d(arg: f32) callconv(.AAPCS) f64 {
     @setRuntimeSafety(false);
-    return @call(.{ .modifier = .always_inline }, __extendsfdf2, .{arg});
+    return @call(.{ .modifier = .always_inline }, extendXfYf2, .{ f64, f32, @bitCast(u32, arg) });
 }
 
-const CHAR_BIT = 8;
-
 inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits)) dst_t {
     @setRuntimeSafety(builtin.is_test);
 
@@ -50,7 +53,7 @@ inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: std.meta.In
 
     // Various constants whose values follow from the type parameters.
     // Any reasonable optimizer will fold and propagate all of these.
-    const srcBits = @sizeOf(src_t) * CHAR_BIT;
+    const srcBits = @bitSizeOf(src_t);
     const srcExpBits = srcBits - srcSigBits - 1;
     const srcInfExp = (1 << srcExpBits) - 1;
     const srcExpBias = srcInfExp >> 1;
@@ -62,7 +65,7 @@ inline fn extendXfYf2(comptime dst_t: type, comptime src_t: type, a: std.meta.In
     const srcQNaN = 1 << (srcSigBits - 1);
     const srcNaNCode = srcQNaN - 1;
 
-    const dstBits = @sizeOf(dst_t) * CHAR_BIT;
+    const dstBits = @bitSizeOf(dst_t);
     const dstExpBits = dstBits - dstSigBits - 1;
     const dstInfExp = (1 << dstExpBits) - 1;
     const dstExpBias = dstInfExp >> 1;
lib/std/special/compiler_rt/extendXfYf2_test.zig
@@ -3,6 +3,7 @@ const __extendhfsf2 = @import("extendXfYf2.zig").__extendhfsf2;
 const __extendhftf2 = @import("extendXfYf2.zig").__extendhftf2;
 const __extendsftf2 = @import("extendXfYf2.zig").__extendsftf2;
 const __extenddftf2 = @import("extendXfYf2.zig").__extenddftf2;
+const F16T = @import("extendXfYf2.zig").F16T;
 
 fn test__extenddftf2(a: f64, expectedHi: u64, expectedLo: u64) !void {
     const x = __extenddftf2(a);
@@ -27,7 +28,7 @@ fn test__extenddftf2(a: f64, expectedHi: u64, expectedLo: u64) !void {
 }
 
 fn test__extendhfsf2(a: u16, expected: u32) !void {
-    const x = __extendhfsf2(a);
+    const x = __extendhfsf2(@bitCast(F16T, a));
     const rep = @bitCast(u32, x);
 
     if (rep == expected) {
@@ -159,7 +160,7 @@ fn makeInf32() f32 {
 }
 
 fn test__extendhftf2(a: u16, expectedHi: u64, expectedLo: u64) !void {
-    const x = __extendhftf2(a);
+    const x = __extendhftf2(@bitCast(F16T, a));
 
     const rep = @bitCast(u128, x);
     const hi = @intCast(u64, rep >> 64);
lib/std/special/compiler_rt/truncXfYf2.zig
@@ -1,15 +1,21 @@
 const std = @import("std");
+const builtin = @import("builtin");
+const native_arch = builtin.cpu.arch;
 
-pub fn __truncsfhf2(a: f32) callconv(.C) u16 {
-    return @bitCast(u16, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f32, a }));
+// AArch64 is the only ABI (at the moment) to support f16 arguments without the
+// need for extending them to wider fp types.
+pub const F16T = if (native_arch.isAARCH64()) f16 else u16;
+
+pub fn __truncsfhf2(a: f32) callconv(.C) F16T {
+    return @bitCast(F16T, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f32, a }));
 }
 
-pub fn __truncdfhf2(a: f64) callconv(.C) u16 {
-    return @bitCast(u16, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f64, a }));
+pub fn __truncdfhf2(a: f64) callconv(.C) F16T {
+    return @bitCast(F16T, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f64, a }));
 }
 
-pub fn __trunctfhf2(a: f128) callconv(.C) u16 {
-    return @bitCast(u16, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f128, a }));
+pub fn __trunctfhf2(a: f128) callconv(.C) F16T {
+    return @bitCast(F16T, @call(.{ .modifier = .always_inline }, truncXfYf2, .{ f16, f128, a }));
 }
 
 pub fn __trunctfsf2(a: f128) callconv(.C) f32 {
lib/std/special/compiler_rt/truncXfYf2_test.zig
@@ -1,7 +1,7 @@
 const __truncsfhf2 = @import("truncXfYf2.zig").__truncsfhf2;
 
 fn test__truncsfhf2(a: u32, expected: u16) !void {
-    const actual = __truncsfhf2(@bitCast(f32, a));
+    const actual = @bitCast(u16, __truncsfhf2(@bitCast(f32, a)));
 
     if (actual == expected) {
         return;
@@ -82,7 +82,7 @@ fn test__truncdfhf2(a: f64, expected: u16) void {
 }
 
 fn test__truncdfhf2_raw(a: u64, expected: u16) void {
-    const actual = __truncdfhf2(@bitCast(f64, a));
+    const actual = @bitCast(u16, __truncdfhf2(@bitCast(f64, a)));
 
     if (actual == expected) {
         return;
test/behavior/cast_stage1.zig
@@ -263,6 +263,32 @@ test "cast *[1][*]const u8 to [*]const ?[*]const u8" {
     try expect(mem.eql(u8, std.mem.spanZ(@ptrCast([*:0]const u8, x[0].?)), "window name"));
 }
 
+test "cast f16 to wider types" {
+    const S = struct {
+        fn doTheTest() !void {
+            var x: f16 = 1234.0;
+            try std.testing.expectEqual(@as(f32, 1234.0), x);
+            try std.testing.expectEqual(@as(f64, 1234.0), x);
+            try std.testing.expectEqual(@as(f128, 1234.0), x);
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "cast f128 to narrower types" {
+    const S = struct {
+        fn doTheTest() !void {
+            var x: f128 = 1234.0;
+            try std.testing.expectEqual(@as(f16, 1234.0), @floatCast(f16, x));
+            try std.testing.expectEqual(@as(f32, 1234.0), @floatCast(f32, x));
+            try std.testing.expectEqual(@as(f64, 1234.0), @floatCast(f64, x));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
 test "vector casts" {
     const S = struct {
         fn doTheTest() !void {
test/behavior/muladd.zig
@@ -24,8 +24,7 @@ fn testMulAdd() !void {
         var c: f64 = 6.25;
         try expect(@mulAdd(f64, a, b, c) == 20);
     }
-    // TODO https://github.com/ziglang/zig/issues/9900
-    if (@import("builtin").cpu.arch != .aarch64) {
+    {
         var a: f16 = 5.5;
         var b: f128 = 2.5;
         var c: f128 = 6.25;