Commit `67d04a988a`

Veikka Tuominen <git@vexu.eu>

2022-01-19 17:47:51

std: add f80 bits

master

1 parent a31a749

Changed files (5)

doc

langref.html.in

lib

std

math

@@ -737,6 +737,11 @@ pub fn main() void {
           <td><code class="c">double</code></td>
           <td>64-bit floating point (52-bit mantissa) IEEE-754-2008 binary64</td>
         </tr>
+        <tr>
+            <th scope="row">{#syntax#}f80{#endsyntax#}</th>
+          <td><code class="c">double</code></td>
+          <td>64-bit floating point (64-bit mantissa) IEEE-754-2008 80-bit extended precision</td>
+        </tr>
         <tr>
             <th scope="row">{#syntax#}f128{#endsyntax#}</th>
             <td><code class="c">_Float128</code></td>
@@ -1500,6 +1505,7 @@ fn divide(a: i32, b: i32) i32 {
           <li>{#syntax#}f16{#endsyntax#} - IEEE-754-2008 binary16</li>
           <li>{#syntax#}f32{#endsyntax#} - IEEE-754-2008 binary32</li>
           <li>{#syntax#}f64{#endsyntax#} - IEEE-754-2008 binary64</li>
+          <li>{#syntax#}f80{#endsyntax#} - IEEE-754-2008 80-bit extended precision</li>
           <li>{#syntax#}f128{#endsyntax#} - IEEE-754-2008 binary128</li>
           <li>{#syntax#}c_longdouble{#endsyntax#} - matches <code class="c">long double</code> for the target C ABI</li>
       </ul>

@@ -8,6 +8,7 @@ pub fn epsilon(comptime T: type) T {
         f16 => math.f16_epsilon,
         f32 => math.f32_epsilon,
         f64 => math.f64_epsilon,
+        f80 => math.f80_epsilon,
         f128 => math.f128_epsilon,
         else => @compileError("epsilon not implemented for " ++ @typeName(T)),
     };

@@ -7,6 +7,7 @@ pub fn inf(comptime T: type) T {
         f16 => math.inf_f16,
         f32 => math.inf_f32,
         f64 => math.inf_f64,
+        f80 => math.inf_f80,
         f128 => math.inf_f128,
         else => @compileError("inf not implemented for " ++ @typeName(T)),
     };

@@ -6,6 +6,7 @@ pub fn nan(comptime T: type) T {
         f16 => math.nan_f16,
         f32 => math.nan_f32,
         f64 => math.nan_f64,
+        f80 => math.nan_f80,
         f128 => math.nan_f128,
         else => @compileError("nan not implemented for " ++ @typeName(T)),
     };
@@ -19,6 +20,8 @@ pub fn snan(comptime T: type) T {
         f16 => @bitCast(f16, math.nan_u16),
         f32 => @bitCast(f32, math.nan_u32),
         f64 => @bitCast(f64, math.nan_u64),
+        f80 => @bitCast(f80, math.nan_u80),
+        f128 => @bitCast(f128, math.nan_u128),
         else => @compileError("snan not implemented for " ++ @typeName(T)),
     };
 }

@@ -43,7 +43,21 @@ pub const f128_max = @bitCast(f128, @as(u128, 0x7FFEFFFFFFFFFFFFFFFFFFFFFFFFFFFF
 pub const f128_epsilon = @bitCast(f128, @as(u128, 0x3F8F0000000000000000000000000000));
 pub const f128_toint = 1.0 / f128_epsilon;
 
+const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct {
+    fraction: u64,
+    exp: u16,
+} else extern struct {
+    exp: u16,
+    fraction: u64,
+};
+
 // float.h details
+pub const f80_true_min = @ptrCast(*const f80, &F80Repr{ .fraction = 1, .exp = 0 }).*;
+pub const f80_min = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 1 }).*;
+pub const f80_max = @ptrCast(*const f80, &F80Repr{ .fraction = 0xFFFFFFFFFFFFFFFF, .exp = 0x7FFE }).*;
+pub const f80_epsilon = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 0x3FC0 }).*;
+pub const f80_toint = 1.0 / f80_epsilon;
+
 pub const f64_true_min = 4.94065645841246544177e-324;
 pub const f64_min = 2.2250738585072014e-308;
 pub const f64_max = 1.79769313486231570815e+308;
@@ -91,6 +105,10 @@ pub const qnan_f64 = @bitCast(f64, qnan_u64);
 pub const inf_u64 = @as(u64, 0x7FF << 52);
 pub const inf_f64 = @bitCast(f64, inf_u64);
 
+pub const inf_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 0x7fff }).*;
+pub const nan_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0xA000000000000000, .exp = 0x7fff }).*;
+pub const qnan_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0xC000000000000000, .exp = 0x7fff }).*;
+
 pub const nan_u128 = @as(u128, 0x7fff0000000000000000000000000001);
 pub const nan_f128 = @bitCast(f128, nan_u128);

Commit 67d04a988a

Commit `67d04a988a`