Commit 96d86e3465

Cody Tapscott <topolarity@tapscott.me>
2022-04-26 01:42:13
compiler_rt: Fix rounding edge case for __mulxf3
1 parent f554077
Changed files (5)
lib/std/special/compiler_rt/addXf3.zig
@@ -74,7 +74,7 @@ fn normalize(comptime T: type, significand: *std.meta.Int(.unsigned, @typeInfo(T
 
     const shift = @clz(std.meta.Int(.unsigned, bits), significand.*) - @clz(Z, integerBit);
     significand.* <<= @intCast(S, shift);
-    return 1 - shift;
+    return @as(i32, 1) - shift;
 }
 
 // TODO: restore inline keyword, see: https://github.com/ziglang/zig/issues/2154
@@ -210,12 +210,10 @@ fn addXf3(comptime T: type, a: T, b: T) T {
     if (aExponent >= maxExponent) return @bitCast(T, infRep | resultSign);
 
     if (aExponent <= 0) {
-        // Result is denormal before rounding; the exponent is zero and we
-        // need to shift the significand.
-        const shift = @intCast(Z, 1 - aExponent);
-        const sticky = if (aSignificand << @intCast(S, typeWidth - shift) != 0) @as(Z, 1) else 0;
-        aSignificand = aSignificand >> @intCast(S, shift | sticky);
-        aExponent = 0;
+        // Result is denormal; the exponent and round/sticky bits are zero.
+        // All we need to do is shift the significand and apply the correct sign.
+        aSignificand >>= @intCast(S, 4 - aExponent);
+        return @bitCast(T, resultSign | aSignificand);
     }
 
     // Low three bits are round, guard, and sticky.
lib/std/special/compiler_rt/addXf3_test.zig
@@ -152,4 +152,6 @@ test "addxf3" {
     try test__addxf3(0x1.0fff_ffff_ffff_fffep+0, 0x1.8p-63, 0x3FFF_8800000000000000); // round down to even
     try test__addxf3(0x1.0fff_ffff_ffff_fffep+0, 0x1.9p-63, 0x3FFF_8800000000000001); // round up
     try test__addxf3(0x1.0fff_ffff_ffff_fffep+0, 0x2.0p-63, 0x3FFF_8800000000000001); // exact
+    try test__addxf3(0x0.ffff_ffff_ffff_fffcp-16382, 0x0.0000_0000_0000_0002p-16382, 0x0000_7FFFFFFFFFFFFFFF); // exact
+    try test__addxf3(0x0.1fff_ffff_ffff_fffcp-16382, 0x0.0000_0000_0000_0002p-16382, 0x0000_0FFFFFFFFFFFFFFF); // exact
 }
lib/std/special/compiler_rt/mulXf3.zig
@@ -152,6 +152,10 @@ fn mulXf3(comptime T: type, a: T, b: T) T {
         const sticky = wideShrWithTruncation(ZSignificand, &productHi, &productLo, shift);
         productLo |= @boolToInt(sticky);
         result = productHi;
+
+        // We include the integer bit so that rounding will carry to the exponent,
+        // but it will be removed later if the result is still denormal
+        if (significandBits != fractionalBits) result |= integerBit;
     } else {
         // Result is normal before rounding; insert the exponent.
         result = productHi & significandMask;
@@ -166,7 +170,11 @@ fn mulXf3(comptime T: type, a: T, b: T) T {
 
     // Restore any explicit integer bit, if it was rounded off
     if (significandBits != fractionalBits) {
-        if ((result >> significandBits) != 0) result |= integerBit;
+        if ((result >> significandBits) != 0) {
+            result |= integerBit;
+        } else {
+            result &= ~integerBit;
+        }
     }
 
     // Insert the sign of the result:
lib/std/special/compiler_rt/mulXf3_test.zig
@@ -105,6 +105,7 @@ test "multf3" {
 
     try test__multf3(0x1.0000_0000_0000_0000_0000_0000_0001p+0, 0x1.8p+5, 0x4004_8000_0000_0000, 0x0000_0000_0000_0002);
     try test__multf3(0x1.0000_0000_0000_0000_0000_0000_0002p+0, 0x1.8p+5, 0x4004_8000_0000_0000, 0x0000_0000_0000_0003);
+    try test__multf3(2.0, math.floatTrueMin(f128), 0x0000_0000_0000_0000, 0x0000_0000_0000_0002);
 }
 
 const qnan80 = @bitCast(f80, @bitCast(u80, math.nan(f80)) | (1 << (math.floatFractionalBits(f80) - 1)));
@@ -164,4 +165,7 @@ test "mulxf3" {
 
     try test__mulxf3(0x1.0000_0001p+0, 0x1.0000_0001p+0, 0x3FFF_8000_0001_0000_0000); // round down to even
     try test__mulxf3(0x1.0000_0001p+0, 0x1.0000_0001_0002p+0, 0x3FFF_8000_0001_0001_0001); // round up
+    try test__mulxf3(0x0.8000_0000_0000_0000p-16382, 2.0, 0x0001_8000_0000_0000_0000); // denormal -> normal
+    try test__mulxf3(0x0.7fff_ffff_ffff_fffep-16382, 0x2.0000_0000_0000_0008p0, 0x0001_8000_0000_0000_0000); // denormal -> normal
+    try test__mulxf3(0x0.7fff_ffff_ffff_fffep-16382, 0x1.0000_0000_0000_0000p0, 0x0000_3FFF_FFFF_FFFF_FFFF); // denormal -> denormal
 }
lib/std/special/compiler_rt/README.md
@@ -152,53 +152,53 @@ Bugs should be solved by trying to duplicate the bug upstream, if possible.
 - todo todo __fixsfsi      // convert a to i32, rounding towards zero
 - todo todo __fixdfsi      //
 - todo todo __fixtfsi      //
-- none none __fixxfsi      // missing
+- todo todo __fixxfsi      //
 - todo todo __fixsfdi      // convert a to i64, rounding towards zero
 - todo todo __fixdfdi      //
 - todo todo __fixtfdi      //
-- none none __fixxfdi      // missing
+- todo todo __fixxfdi      //
 - todo todo __fixsfti      // convert a to i128, rounding towards zero
 - todo todo __fixdfti      //
 - todo todo __fixtfdi      //
-- none none __fixxfti      // missing
+- todo todo __fixxfti      //
 
 - __fixunssfsi   // convert to u32, rounding towards zero. negative values become 0.
 - __fixunsdfsi   //
 - __fixunstfsi   //
-- __fixunsxfsi   // missing
+- __fixunsxfsi   //
 - __fixunssfdi   // convert to u64, rounding towards zero. negative values become 0.
 - __fixunsdfdi   //
 - __fixunstfdi   //
-- __fixunsxfdi   // missing
+- __fixunsxfdi   //
 - __fixunssfti   // convert to u128, rounding towards zero. negative values become 0.
 - __fixunsdfti   //
 - __fixunstfdi   //
-- __fixunsxfti   // missing
+- __fixunsxfti   //
 
 - __floatsisf    // convert i32 to floating point
 - __floatsidf    //
 - __floatsitf    //
-- __floatsixf    // missing
+- __floatsixf    //
 - __floatdisf    // convert i64 to floating point
 - __floatdidf    //
 - __floatditf    //
-- __floatdixf    // missing
+- __floatdixf    //
 - __floattisf    // convert i128 to floating point
 - __floattidf    //
-- __floattixf    // missing
+- __floattixf    //
 
-- __floatunsisf  // convert i32 to floating point
+- __floatunsisf  // convert u32 to floating point
 - __floatunsidf  //
 - __floatunsitf  //
-- __floatunsixf  // missing
-- __floatundisf  // convert i64 to floating point
+- __floatunsixf  //
+- __floatundisf  // convert u64 to floating point
 - __floatundidf  //
 - __floatunditf  //
-- __floatundixf  // missing
-- __floatuntisf  // convert i128 to floating point
+- __floatundixf  //
+- __floatuntisf  // convert u128 to floating point
 - __floatuntidf  //
 - __floatuntitf  //
-- __floatuntixf  // missing
+- __floatuntixf  //
 
 #### Float Comparison
 - __cmpsf2       // return (a<b)=>-1,(a==b)=>0,(a>b)=>1,Nan=>1 dont rely on this
@@ -242,11 +242,11 @@ Bugs should be solved by trying to duplicate the bug upstream, if possible.
 - __mulsf3       // a * b
 - __muldf3       // a * b
 - __multf3       // a * b
-- __mulxf3       // a * b missing
+- __mulxf3       // a * b
 - __divsf3       // a / b
 - __divdf3       // a / b
 - __divtf3       // a / b
-- __divxf3       // a / b missing
+- __divxf3       // a / b
 - __negsf2       // -a symbol-level compatibility: libgcc uses this for the rl78
 - __negdf2       // -a unnecessary: can be lowered directly to a xor
 - __negtf2       // -a