Commit `319555a669`

Cody Tapscott <topolarity@tapscott.me>

2022-04-12 21:23:18

Add `floatFractionalBits` to replace `floatMantissaDigits`

master

1 parent 319b5cb

Changed files (5)

lib

std

math

float.zig

isnormal.zig

special

compiler_rt

fixXfYi.zig

floatXiYf.zig

math.zig

@@ -4,7 +4,7 @@ const expect = std.testing.expect;
 
 /// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
 fn mantissaOne(comptime T: type) comptime_int {
-    return if (floatMantissaDigits(T) == 64) 1 << 63 else 0;
+    return if (T == f80) 1 << floatFractionalBits(T) else 0;
 }
 
 /// Creates floating point type T from an unbiased exponent and raw mantissa.
@@ -42,19 +42,19 @@ pub fn floatMantissaBits(comptime T: type) comptime_int {
     };
 }
 
-/// Returns the number of binary digits in the mantissa of floating point type T.
-pub fn floatMantissaDigits(comptime T: type) comptime_int {
+/// Returns the number of fractional bits in the mantissa of floating point type T.
+pub fn floatFractionalBits(comptime T: type) comptime_int {
     assert(@typeInfo(T) == .Float);
 
     // standard IEEE floats have an implicit 0.m or 1.m integer part
     // f80 is special and has an explicitly stored bit in the MSB
-    // this function corresponds to `MANT_DIG' constants from C
+    // this function corresponds to `MANT_DIG - 1' from C
     return switch (@typeInfo(T).Float.bits) {
-        16 => 11,
-        32 => 24,
-        64 => 53,
-        80 => 64,
-        128 => 113,
+        16 => 10,
+        32 => 23,
+        64 => 52,
+        80 => 63,
+        128 => 112,
         else => @compileError("unknown floating point type " ++ @typeName(T)),
     };
 }
@@ -89,7 +89,7 @@ pub fn floatMax(comptime T: type) T {
 
 /// Returns the machine epsilon of floating point type T.
 pub fn floatEps(comptime T: type) T {
-    return reconstructFloat(T, -(floatMantissaDigits(T) - 1), mantissaOne(T));
+    return reconstructFloat(T, -floatFractionalBits(T), mantissaOne(T));
 }
 
 /// Returns the value inf for floating point type T.
@@ -104,7 +104,7 @@ test "std.math.float" {
         try expect(@bitSizeOf(T) == size);
 
         // for machine epsilon, assert expmin <= -prec <= expmax
-        try expect(floatExponentMin(T) <= -(floatMantissaDigits(T) - 1));
-        try expect(-(floatMantissaDigits(T) - 1) <= floatExponentMax(T));
+        try expect(floatExponentMin(T) <= -floatFractionalBits(T));
+        try expect(-floatFractionalBits(T) <= floatExponentMax(T));
     }
 }

@@ -41,7 +41,7 @@ test "math.isNormal" {
         try expect(!isNormal(@as(T, math.floatTrueMin(T))));
 
         // largest subnormal
-        try expect(!isNormal(@bitCast(T, ~(~@as(TBits, 0) << math.floatMantissaDigits(T) - 1))));
+        try expect(!isNormal(@bitCast(T, ~(~@as(TBits, 0) << math.floatFractionalBits(T)))));
 
         // non-finite numbers
         try expect(!isNormal(-math.inf(T)));

@@ -12,7 +12,7 @@ pub inline fn fixXfYi(comptime I: type, a: anytype) I {
     const rep_t = std.meta.Int(.unsigned, float_bits);
     const sig_bits = math.floatMantissaBits(F);
     const exp_bits = math.floatExponentBits(F);
-    const fractional_sig_bits = math.floatMantissaDigits(F) - 1;
+    const fractional_bits = math.floatFractionalBits(F);
 
     const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
     const max_exp = (1 << (exp_bits - 1));
@@ -42,10 +42,10 @@ pub inline fn fixXfYi(comptime I: type, a: anytype) I {
     // If 0 <= exponent < sig_bits, right shift to get the result.
     // Otherwise, shift left.
     var result: I = undefined;
-    if (exponent < fractional_sig_bits) {
-        result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_sig_bits - exponent));
+    if (exponent < fractional_bits) {
+        result = @intCast(I, significand >> @intCast(Log2Int(rep_t), fractional_bits - exponent));
     } else {
-        result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_sig_bits);
+        result = @intCast(I, significand) << @intCast(Log2Int(I), exponent - fractional_bits);
     }
 
     if ((@typeInfo(I).Int.signedness == .signed) and negative)

@@ -17,9 +17,9 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
     const float_bits = @bitSizeOf(T);
     const int_bits = @bitSizeOf(@TypeOf(x));
     const exp_bits = math.floatExponentBits(T);
-    const sig_bits = math.floatMantissaDigits(T) - 1; // Only counts the fractional bits
+    const fractional_bits = math.floatFractionalBits(T);
     const exp_bias = math.maxInt(std.meta.Int(.unsigned, exp_bits - 1));
-    const implicit_bit = if (T != f80) @as(uT, 1) << sig_bits else 0;
+    const implicit_bit = if (T != f80) @as(uT, 1) << fractional_bits else 0;
     const max_exp = exp_bias;
 
     // Sign
@@ -29,14 +29,14 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
 
     // Compute significand
     var exp = int_bits - @clz(Z, abs_val) - 1;
-    if (int_bits <= sig_bits or exp <= sig_bits) {
-        const shift_amt = sig_bits - @intCast(math.Log2Int(uT), exp);
+    if (int_bits <= fractional_bits or exp <= fractional_bits) {
+        const shift_amt = fractional_bits - @intCast(math.Log2Int(uT), exp);
 
         // Shift up result to line up with the significand - no rounding required
         result = (@intCast(uT, abs_val) << shift_amt);
         result ^= implicit_bit; // Remove implicit integer bit
     } else {
-        var shift_amt = @intCast(math.Log2Int(Z), exp - sig_bits);
+        var shift_amt = @intCast(math.Log2Int(Z), exp - fractional_bits);
         const exact_tie: bool = @ctz(Z, abs_val) == shift_amt - 1;
 
         // Shift down result and remove implicit integer bit
@@ -53,7 +53,7 @@ pub fn floatXiYf(comptime T: type, x: anytype) T {
     result += (@as(uT, exp) + exp_bias) << math.floatMantissaBits(T);
 
     // If the result included a carry, we need to restore the explicit integer bit
-    if (T == f80) result |= 1 << sig_bits;
+    if (T == f80) result |= 1 << fractional_bits;
 
     return @bitCast(T, sign_bit | result);
 }

@@ -38,7 +38,7 @@ pub const sqrt1_2 = 0.707106781186547524400844362104849039;
 
 pub const floatExponentBits = @import("math/float.zig").floatExponentBits;
 pub const floatMantissaBits = @import("math/float.zig").floatMantissaBits;
-pub const floatMantissaDigits = @import("math/float.zig").floatMantissaDigits;
+pub const floatFractionalBits = @import("math/float.zig").floatFractionalBits;
 pub const floatExponentMin = @import("math/float.zig").floatExponentMin;
 pub const floatExponentMax = @import("math/float.zig").floatExponentMax;
 pub const floatTrueMin = @import("math/float.zig").floatTrueMin;

Commit 319555a669

Commit `319555a669`