Commit 6a736f0c8c

Veikka Tuominen <git@vexu.eu>
2022-01-21 20:49:02
compiler-rt: add add/sub for f80
1 parent 9bbd3ab
Changed files (2)
lib
std
special
lib/std/special/compiler_rt/addXf3.zig
@@ -225,6 +225,177 @@ fn addXf3(comptime T: type, a: T, b: T) T {
     return @bitCast(T, result);
 }
 
+fn normalize_f80(exp: *i32, significand: *u80) void {
+    const shift = @clz(u64, @truncate(u64, significand.*));
+    significand.* = (significand.* << shift);
+    exp.* += -@as(i8, shift);
+}
+
+pub fn __addxf3(a: f80, b: f80) callconv(.C) f80 {
+    var a_rep align(16) = @ptrCast(*const std.math.F80Repr, &a).*;
+    var b_rep align(16) = @ptrCast(*const std.math.F80Repr, &b).*;
+    var a_exp: i32 = a_rep.exp & 0x7FFF;
+    var b_exp: i32 = b_rep.exp & 0x7FFF;
+
+    const significand_bits = std.math.floatMantissaBits(f80);
+    const int_bit = 0x8000000000000000;
+    const significand_mask = 0x7FFFFFFFFFFFFFFF;
+    const qnan_bit = 0xC000000000000000;
+    const max_exp = 0x7FFF;
+    const sign_bit = 0x8000;
+
+    // Detect if a or b is infinity, or NaN.
+    if (a_exp == max_exp) {
+        if (a_rep.fraction ^ int_bit == 0) {
+            if (b_exp == max_exp and (b_rep.fraction ^ int_bit == 0)) {
+                // +/-infinity + -/+infinity = qNaN
+                return std.math.qnan_f80;
+            }
+            // +/-infinity + anything = +/- infinity
+            return a;
+        } else {
+            std.debug.assert(a_rep.fraction & significand_mask != 0);
+            // NaN + anything = qNaN
+            a_rep.fraction |= qnan_bit;
+            return @ptrCast(*const f80, &a_rep).*;
+        }
+    }
+    if (b_exp == max_exp) {
+        if (b_rep.fraction ^ int_bit == 0) {
+            // anything + +/-infinity = +/-infinity
+            return b;
+        } else {
+            std.debug.assert(b_rep.fraction & significand_mask != 0);
+            // anything + NaN = qNaN
+            b_rep.fraction |= qnan_bit;
+            return @ptrCast(*const f80, &b_rep).*;
+        }
+    }
+
+    const a_zero = (a_rep.fraction | @bitCast(u32, a_exp)) == 0;
+    const b_zero = (b_rep.fraction | @bitCast(u32, b_exp)) == 0;
+    if (a_zero) {
+        // zero + anything = anything
+        if (b_zero) {
+            // but we need to get the sign right for zero + zero
+            a_rep.exp &= b_rep.exp;
+            return @ptrCast(*const f80, &a_rep).*;
+        } else {
+            return b;
+        }
+    } else if (b_zero) {
+        // anything + zero = anything
+        return a;
+    }
+
+    var a_int: u80 = a_rep.fraction | (@as(u80, a_rep.exp & max_exp) << significand_bits);
+    var b_int: u80 = b_rep.fraction | (@as(u80, b_rep.exp & max_exp) << significand_bits);
+
+    // Swap a and b if necessary so that a has the larger absolute value.
+    if (b_int > a_int) {
+        const temp = a_rep;
+        a_rep = b_rep;
+        b_rep = temp;
+    }
+
+    // Extract the exponent and significand from the (possibly swapped) a and b.
+    a_exp = a_rep.exp & max_exp;
+    b_exp = b_rep.exp & max_exp;
+    a_int = a_rep.fraction;
+    b_int = b_rep.fraction;
+
+    // Normalize any denormals, and adjust the exponent accordingly.
+    normalize_f80(&a_exp, &a_int);
+    normalize_f80(&b_exp, &b_int);
+
+    // The sign of the result is the sign of the larger operand, a.  If they
+    // have opposite signs, we are performing a subtraction; otherwise addition.
+    const result_sign = a_rep.exp & sign_bit;
+    const subtraction = (a_rep.exp ^ b_rep.exp) & sign_bit != 0;
+
+    // Shift the significands to give us round, guard and sticky, and or in the
+    // implicit significand bit.  (If we fell through from the denormal path it
+    // was already set by normalize( ), but setting it twice won't hurt
+    // anything.)
+    a_int = a_int << 3;
+    b_int = b_int << 3;
+
+    // Shift the significand of b by the difference in exponents, with a sticky
+    // bottom bit to get rounding correct.
+    const @"align" = @intCast(u80, a_exp - b_exp);
+    if (@"align" != 0) {
+        if (@"align" < 80) {
+            const sticky = if (b_int << @intCast(u7, 80 - @"align") != 0) @as(u80, 1) else 0;
+            b_int = (b_int >> @truncate(u7, @"align")) | sticky;
+        } else {
+            b_int = 1; // sticky; b is known to be non-zero.
+        }
+    }
+    if (subtraction) {
+        a_int -= b_int;
+        // If a == -b, return +zero.
+        if (a_int == 0) return 0.0;
+
+        // If partial cancellation occurred, we need to left-shift the result
+        // and adjust the exponent:
+        if (a_int < int_bit << 3) {
+            const shift = @intCast(i32, @clz(u80, a_int)) - @intCast(i32, @clz(u80, int_bit << 3));
+            a_int <<= @intCast(u7, shift);
+            a_exp -= shift;
+        }
+    } else { // addition
+        a_int += b_int;
+
+        // If the addition carried up, we need to right-shift the result and
+        // adjust the exponent:
+        if (a_int & (int_bit << 4) != 0) {
+            const sticky = a_int & 1;
+            a_int = a_int >> 1 | sticky;
+            a_exp += 1;
+        }
+    }
+
+    // If we have overflowed the type, return +/- infinity:
+    if (a_exp >= max_exp) {
+        a_rep.exp = max_exp | result_sign;
+        a_rep.fraction = int_bit; // integer bit is set for +/-inf
+        return @ptrCast(*const f80, &a_rep).*;
+    }
+
+    if (a_exp <= 0) {
+        // Result is denormal before rounding; the exponent is zero and we
+        // need to shift the significand.
+        const shift = @intCast(u80, 1 - a_exp);
+        const sticky = if (a_int << @intCast(u7, 80 - shift) != 0) @as(u1, 1) else 0;
+        a_int = a_int >> @intCast(u7, shift | sticky);
+        a_exp = 0;
+    }
+
+    // Low three bits are round, guard, and sticky.
+    const round_guard_sticky = @truncate(u3, a_int);
+
+    // Shift the significand into place.
+    a_int = @truncate(u64, a_int >> 3);
+
+    // // Insert the exponent and sign.
+    a_int |= (@intCast(u80, a_exp) | result_sign) << significand_bits;
+
+    // Final rounding.  The result may overflow to infinity, but that is the
+    // correct result in that case.
+    if (round_guard_sticky > 0x4) a_int += 1;
+    if (round_guard_sticky == 0x4) a_int += a_int & 1;
+
+    a_rep.fraction = @truncate(u64, a_int);
+    a_rep.exp = @truncate(u16, a_int >> significand_bits);
+    return @ptrCast(*const f80, &a_rep).*;
+}
+
+pub fn __subxf3(a: f80, b: f80) callconv(.C) f80 {
+    var b_rep align(16) = @ptrCast(*const std.math.F80Repr, &b).*;
+    b_rep.exp ^= 0x8000;
+    return __addxf3(a, @ptrCast(*const f80, &b_rep).*);
+}
+
 test {
     _ = @import("addXf3_test.zig");
 }
lib/std/special/compiler_rt.zig
@@ -237,12 +237,16 @@ comptime {
     @export(__adddf3, .{ .name = "__adddf3", .linkage = linkage });
     const __addtf3 = @import("compiler_rt/addXf3.zig").__addtf3;
     @export(__addtf3, .{ .name = "__addtf3", .linkage = linkage });
+    const __addxf3 = @import("compiler_rt/addXf3.zig").__addxf3;
+    @export(__addxf3, .{ .name = "__addxf3", .linkage = linkage });
     const __subsf3 = @import("compiler_rt/addXf3.zig").__subsf3;
     @export(__subsf3, .{ .name = "__subsf3", .linkage = linkage });
     const __subdf3 = @import("compiler_rt/addXf3.zig").__subdf3;
     @export(__subdf3, .{ .name = "__subdf3", .linkage = linkage });
     const __subtf3 = @import("compiler_rt/addXf3.zig").__subtf3;
     @export(__subtf3, .{ .name = "__subtf3", .linkage = linkage });
+    const __subxf3 = @import("compiler_rt/addXf3.zig").__subxf3;
+    @export(__subxf3, .{ .name = "__subxf3", .linkage = linkage });
 
     const __mulsf3 = @import("compiler_rt/mulXf3.zig").__mulsf3;
     @export(__mulsf3, .{ .name = "__mulsf3", .linkage = linkage });