Commit 7fa97b752e

Andrew Kelley <superjoe30@gmail.com>
2018-05-26 20:57:53
add strict float mode to some math functions
fixes a test failure for acosh32
1 parent 8efb3f5
Changed files (4)
std/math/acosh.zig
@@ -19,6 +19,8 @@ pub fn acosh(x: var) @typeOf(x) {
 
 // acosh(x) = log(x + sqrt(x * x - 1))
 fn acosh32(x: f32) f32 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
     const u = @bitCast(u32, x);
     const i = u & 0x7FFFFFFF;
 
@@ -37,6 +39,8 @@ fn acosh32(x: f32) f32 {
 }
 
 fn acosh64(x: f64) f64 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
     const u = @bitCast(u64, x);
     const e = (u >> 52) & 0x7FF;
 
std/math/isnan.zig
@@ -19,8 +19,8 @@ pub fn isNan(x: var) bool {
     }
 }
 
-// Note: A signalling nan is identical to a standard right now by may have a different bit
-// representation in the future when required.
+/// Note: A signalling nan is identical to a standard nan right now but may have a different bit
+/// representation in the future when required.
 pub fn isSignalNan(x: var) bool {
     return isNan(x);
 }
std/math/log1p.zig
@@ -6,6 +6,7 @@
 // - log1p(x)     = nan if x < -1
 // - log1p(nan)   = nan
 
+const builtin = @import("builtin");
 const std = @import("../index.zig");
 const math = std.math;
 const assert = std.debug.assert;
@@ -20,6 +21,8 @@ pub fn log1p(x: var) @typeOf(x) {
 }
 
 fn log1p_32(x: f32) f32 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
     const ln2_hi = 6.9313812256e-01;
     const ln2_lo = 9.0580006145e-06;
     const Lg1: f32 = 0xaaaaaa.0p-24;
@@ -96,6 +99,8 @@ fn log1p_32(x: f32) f32 {
 }
 
 fn log1p_64(x: f64) f64 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
     const ln2_hi: f64 = 6.93147180369123816490e-01;
     const ln2_lo: f64 = 1.90821492927058770002e-10;
     const Lg1: f64 = 6.666666666666735130e-01;
std/special/builtin.zig
@@ -201,6 +201,8 @@ fn isNan(comptime T: type, bits: T) bool {
 // behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
 // potentially some edge cases remaining that are not handled in the same way.
 export fn sqrt(x: f64) f64 {
+    @setFloatMode(this, builtin.FloatMode.Strict);
+
     const tiny: f64 = 1.0e-300;
     const sign: u32 = 0x80000000;
     const u = @bitCast(u64, x);