Commit 353419f82d

Marc Tiehuis <marctiehuis@gmail.com>
2018-08-23 11:42:09
Default to strict IEEE floating point
Closes #1227.
1 parent 4b68ef4
doc/langref.html.in
@@ -744,19 +744,19 @@ const yet_another_hex_float = 0x103.70P-5;
       {#code_end#}
       {#header_close#}
       {#header_open|Floating Point Operations#}
-      <p>By default floating point operations use <code>Optimized</code> mode,
-      but you can switch to <code>Strict</code> mode on a per-block basis:</p>
+      <p>By default floating point operations use <code>Strict</code> mode,
+      but you can switch to <code>Optimized</code> mode on a per-block basis:</p>
       {#code_begin|obj|foo#}
       {#code_release_fast#}
 const builtin = @import("builtin");
 const big = f64(1 << 40);
 
 export fn foo_strict(x: f64) f64 {
-    @setFloatMode(this, builtin.FloatMode.Strict);
     return x + big - big;
 }
 
 export fn foo_optimized(x: f64) f64 {
+    @setFloatMode(this, builtin.FloatMode.Optimized);
     return x + big - big;
 }
       {#code_end#}
@@ -5948,7 +5948,7 @@ pub const FloatMode = enum {
       {#code_end#}
       <ul>
         <li>
-          <code>Optimized</code> (default) - Floating point operations may do all of the following:
+          <code>Optimized</code> - Floating point operations may do all of the following:
           <ul>
             <li>Assume the arguments and result are not NaN. Optimizations are required to retain defined behavior over NaNs, but the value of the result is undefined.</li>
             <li>Assume the arguments and result are not +/-Inf. Optimizations are required to retain defined behavior over +/-Inf, but the value of the result is undefined.</li>
@@ -5960,7 +5960,7 @@ pub const FloatMode = enum {
           This is equivalent to <code>-ffast-math</code> in GCC.
         </li>
         <li>
-          <code>Strict</code> - Floating point operations follow strict IEEE compliance.
+          <code>Strict</code> (default) - Floating point operations follow strict IEEE compliance.
         </li>
       </ul>
       {#see_also|Floating Point Operations#}
src/all_types.hpp
@@ -1852,7 +1852,7 @@ struct ScopeDecls {
     HashMap<Buf *, Tld *, buf_hash, buf_eql_buf> decl_table;
     bool safety_off;
     AstNode *safety_set_node;
-    bool fast_math_off;
+    bool fast_math_on;
     AstNode *fast_math_set_node;
     ImportTableEntry *import;
     // If this is a scope from a container, this is the type entry, otherwise null
@@ -1872,7 +1872,7 @@ struct ScopeBlock {
 
     bool safety_off;
     AstNode *safety_set_node;
-    bool fast_math_off;
+    bool fast_math_on;
     AstNode *fast_math_set_node;
 };
 
src/codegen.cpp
@@ -829,15 +829,15 @@ static bool ir_want_fast_math(CodeGen *g, IrInstruction *instruction) {
         if (scope->id == ScopeIdBlock) {
             ScopeBlock *block_scope = (ScopeBlock *)scope;
             if (block_scope->fast_math_set_node)
-                return !block_scope->fast_math_off;
+                return block_scope->fast_math_on;
         } else if (scope->id == ScopeIdDecls) {
             ScopeDecls *decls_scope = (ScopeDecls *)scope;
             if (decls_scope->fast_math_set_node)
-                return !decls_scope->fast_math_off;
+                return decls_scope->fast_math_on;
         }
         scope = scope->parent;
     }
-    return true;
+    return false;
 }
 
 static bool ir_want_runtime_safety(CodeGen *g, IrInstruction *instruction) {
src/ir.cpp
@@ -15200,17 +15200,17 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
         return ira->codegen->builtin_types.entry_void;
     }
 
-    bool *fast_math_off_ptr;
+    bool *fast_math_on_ptr;
     AstNode **fast_math_set_node_ptr;
     if (target_type->id == TypeTableEntryIdBlock) {
         ScopeBlock *block_scope = (ScopeBlock *)target_val->data.x_block;
-        fast_math_off_ptr = &block_scope->fast_math_off;
+        fast_math_on_ptr = &block_scope->fast_math_on;
         fast_math_set_node_ptr = &block_scope->fast_math_set_node;
     } else if (target_type->id == TypeTableEntryIdFn) {
         assert(target_val->data.x_ptr.special == ConstPtrSpecialFunction);
         FnTableEntry *target_fn = target_val->data.x_ptr.data.fn.fn_entry;
         assert(target_fn->def_scope);
-        fast_math_off_ptr = &target_fn->def_scope->fast_math_off;
+        fast_math_on_ptr = &target_fn->def_scope->fast_math_on;
         fast_math_set_node_ptr = &target_fn->def_scope->fast_math_set_node;
     } else if (target_type->id == TypeTableEntryIdMetaType) {
         ScopeDecls *decls_scope;
@@ -15226,7 +15226,7 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
                 buf_sprintf("expected scope reference, found type '%s'", buf_ptr(&type_arg->name)));
             return ira->codegen->builtin_types.entry_invalid;
         }
-        fast_math_off_ptr = &decls_scope->fast_math_off;
+        fast_math_on_ptr = &decls_scope->fast_math_on;
         fast_math_set_node_ptr = &decls_scope->fast_math_set_node;
     } else {
         ir_add_error_node(ira, target_instruction->source_node,
@@ -15248,7 +15248,7 @@ static TypeTableEntry *ir_analyze_instruction_set_float_mode(IrAnalyze *ira,
         return ira->codegen->builtin_types.entry_invalid;
     }
     *fast_math_set_node_ptr = source_node;
-    *fast_math_off_ptr = (float_mode_scalar == FloatModeStrict);
+    *fast_math_on_ptr = (float_mode_scalar == FloatModeOptimized);
 
     ir_build_const_from(ira, &instruction->base);
     return ira->codegen->builtin_types.entry_void;
std/fmt/errol/index.zig
@@ -253,11 +253,7 @@ fn gethi(in: f64) f64 {
 /// Normalize the number by factoring in the error.
 ///   @hp: The float pair.
 fn hpNormalize(hp: *HP) void {
-    // Required to avoid segfaults causing buffer overrun during errol3 digit output termination.
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const val = hp.val;
-
     hp.val += hp.off;
     hp.off += val - hp.val;
 }
std/math/complex/exp.zig
@@ -17,8 +17,6 @@ pub fn exp(z: var) @typeOf(z) {
 }
 
 fn exp32(z: Complex(f32)) Complex(f32) {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const exp_overflow = 0x42b17218; // max_exp * ln2 ~= 88.72283955
     const cexp_overflow = 0x43400074; // (max_exp - min_denom_exp) * ln2
 
std/math/ceil.zig
@@ -61,10 +61,8 @@ fn ceil64(x: f64) f64 {
     }
 
     if (u >> 63 != 0) {
-        @setFloatMode(this, builtin.FloatMode.Strict);
         y = x - math.f64_toint + math.f64_toint - x;
     } else {
-        @setFloatMode(this, builtin.FloatMode.Strict);
         y = x + math.f64_toint - math.f64_toint - x;
     }
 
std/math/cos.zig
@@ -37,8 +37,6 @@ const C5 = 4.16666666666665929218E-2;
 //
 // This may have slight differences on some edge cases and may need to replaced if so.
 fn cos32(x_: f32) f32 {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const pi4a = 7.85398125648498535156e-1;
     const pi4b = 3.77489470793079817668E-8;
     const pi4c = 2.69515142907905952645E-15;
std/math/exp.zig
@@ -18,8 +18,6 @@ pub fn exp(x: var) @typeOf(x) {
 }
 
 fn exp32(x_: f32) f32 {
-    @setFloatMode(this, builtin.FloatMode.Strict);
-
     const half = []f32{ 0.5, -0.5 };
     const ln2hi = 6.9314575195e-1;
     const ln2lo = 1.4286067653e-6;
@@ -95,8 +93,6 @@ fn exp32(x_: f32) f32 {
 }
 
 fn exp64(x_: f64) f64 {
-    @setFloatMode(this, builtin.FloatMode.Strict);
-
     const half = []const f64{ 0.5, -0.5 };
     const ln2hi: f64 = 6.93147180369123816490e-01;
     const ln2lo: f64 = 1.90821492927058770002e-10;
std/math/exp2.zig
@@ -36,8 +36,6 @@ const exp2ft = []const f64{
 };
 
 fn exp2_32(x: f32) f32 {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const tblsiz = @intCast(u32, exp2ft.len);
     const redux: f32 = 0x1.8p23 / @intToFloat(f32, tblsiz);
     const P1: f32 = 0x1.62e430p-1;
@@ -353,8 +351,6 @@ const exp2dt = []f64{
 };
 
 fn exp2_64(x: f64) f64 {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const tblsiz = @intCast(u32, exp2dt.len / 2);
     const redux: f64 = 0x1.8p52 / @intToFloat(f64, tblsiz);
     const P1: f64 = 0x1.62e42fefa39efp-1;
std/math/expm1.zig
@@ -19,8 +19,6 @@ pub fn expm1(x: var) @typeOf(x) {
 }
 
 fn expm1_32(x_: f32) f32 {
-    @setFloatMode(this, builtin.FloatMode.Strict);
-
     if (math.isNan(x_))
         return math.nan(f32);
 
@@ -149,8 +147,6 @@ fn expm1_32(x_: f32) f32 {
 }
 
 fn expm1_64(x_: f64) f64 {
-    @setFloatMode(this, builtin.FloatMode.Strict);
-
     if (math.isNan(x_))
         return math.nan(f64);
 
std/math/floor.zig
@@ -97,10 +97,8 @@ fn floor64(x: f64) f64 {
     }
 
     if (u >> 63 != 0) {
-        @setFloatMode(this, builtin.FloatMode.Strict);
         y = x - math.f64_toint + math.f64_toint - x;
     } else {
-        @setFloatMode(this, builtin.FloatMode.Strict);
         y = x + math.f64_toint - math.f64_toint - x;
     }
 
std/math/ln.zig
@@ -35,8 +35,6 @@ pub fn ln(x: var) @typeOf(x) {
 }
 
 pub fn ln_32(x_: f32) f32 {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const ln2_hi: f32 = 6.9313812256e-01;
     const ln2_lo: f32 = 9.0580006145e-06;
     const Lg1: f32 = 0xaaaaaa.0p-24;
@@ -89,8 +87,6 @@ pub fn ln_32(x_: f32) f32 {
 }
 
 pub fn ln_64(x_: f64) f64 {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const ln2_hi: f64 = 6.93147180369123816490e-01;
     const ln2_lo: f64 = 1.90821492927058770002e-10;
     const Lg1: f64 = 6.666666666666735130e-01;
std/math/pow.zig
@@ -28,8 +28,6 @@ const assert = std.debug.assert;
 
 // This implementation is taken from the go stlib, musl is a bit more complex.
 pub fn pow(comptime T: type, x: T, y: T) T {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     if (T != f32 and T != f64) {
         @compileError("pow not implemented for " ++ @typeName(T));
     }
std/math/round.zig
@@ -35,11 +35,7 @@ fn round32(x_: f32) f32 {
         return 0 * @bitCast(f32, u);
     }
 
-    {
-        @setFloatMode(this, builtin.FloatMode.Strict);
-        y = x + math.f32_toint - math.f32_toint - x;
-    }
-
+    y = x + math.f32_toint - math.f32_toint - x;
     if (y > 0.5) {
         y = y + x - 1;
     } else if (y <= -0.5) {
@@ -72,11 +68,7 @@ fn round64(x_: f64) f64 {
         return 0 * @bitCast(f64, u);
     }
 
-    {
-        @setFloatMode(this, builtin.FloatMode.Strict);
-        y = x + math.f64_toint - math.f64_toint - x;
-    }
-
+    y = x + math.f64_toint - math.f64_toint - x;
     if (y > 0.5) {
         y = y + x - 1;
     } else if (y <= -0.5) {
std/math/sin.zig
@@ -38,8 +38,6 @@ const C5 = 4.16666666666665929218E-2;
 //
 // This may have slight differences on some edge cases and may need to replaced if so.
 fn sin32(x_: f32) f32 {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const pi4a = 7.85398125648498535156e-1;
     const pi4b = 3.77489470793079817668E-8;
     const pi4c = 2.69515142907905952645E-15;
std/math/sinh.zig
@@ -54,8 +54,6 @@ fn sinh32(x: f32) f32 {
 }
 
 fn sinh64(x: f64) f64 {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const u = @bitCast(u64, x);
     const w = @intCast(u32, u >> 32);
     const ax = @bitCast(f64, u & (@maxValue(u64) >> 1));
std/math/tan.zig
@@ -31,8 +31,6 @@ const Tq4 = -5.38695755929454629881E7;
 //
 // This may have slight differences on some edge cases and may need to replaced if so.
 fn tan32(x_: f32) f32 {
-    @setFloatMode(this, @import("builtin").FloatMode.Strict);
-
     const pi4a = 7.85398125648498535156e-1;
     const pi4b = 3.77489470793079817668E-8;
     const pi4c = 2.69515142907905952645E-15;