Commit 55c58f226d

LemonBoy <thatlemon@gmail.com>
2021-04-28 21:01:15
std: Add a parser for hexadecimal floating point numbers
Should be good enough to unblock progress on the stage2 compiler. Unifying this parser and the regular one (and perhaps rewrite it, #2207) is left as an exercise for the reader.
1 parent 86e129e
Changed files (2)
lib/std/fmt/parse_hex_float.zig
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.const std = @import("std");
+//
+// The rounding logic is inspired by LLVM's APFloat and Go's atofHex
+// implementation.
+
+const std = @import("std");
+const ascii = std.ascii;
+const fmt = std.fmt;
+const math = std.math;
+const testing = std.testing;
+
+const assert = std.debug.assert;
+
+pub fn parseHexFloat(comptime T: type, s: []const u8) !T {
+    assert(@typeInfo(T) == .Float);
+
+    const IntT = std.meta.Int(.unsigned, @typeInfo(T).Float.bits);
+
+    const mantissa_bits = math.floatMantissaBits(T);
+    const exponent_bits = math.floatExponentBits(T);
+
+    const sign_shift = mantissa_bits + exponent_bits;
+
+    const exponent_bias = (1 << (exponent_bits - 1)) - 1;
+    const exponent_min = 1 - exponent_bias;
+    const exponent_max = exponent_bias;
+
+    if (s.len == 0)
+        return error.InvalidCharacter;
+
+    if (ascii.eqlIgnoreCase(s, "nan")) {
+        return math.nan(T);
+    } else if (ascii.eqlIgnoreCase(s, "inf") or ascii.eqlIgnoreCase(s, "+inf")) {
+        return math.inf(T);
+    } else if (ascii.eqlIgnoreCase(s, "-inf")) {
+        return -math.inf(T);
+    }
+
+    var negative: bool = false;
+    var exp_negative: bool = false;
+
+    var mantissa: u128 = 0;
+    var exponent: i16 = 0;
+    var frac_scale: i16 = 0;
+
+    const State = enum {
+        MaybeSign,
+        Prefix,
+        LeadingIntegerDigit,
+        IntegerDigit,
+        MaybeDot,
+        LeadingFractionDigit,
+        FractionDigit,
+        ExpPrefix,
+        MaybeExpSign,
+        ExpDigit,
+    };
+
+    var state = State.MaybeSign;
+
+    var i: usize = 0;
+    while (i < s.len) {
+        const c = s[i];
+
+        switch (state) {
+            .MaybeSign => {
+                state = .Prefix;
+
+                if (c == '+') {
+                    i += 1;
+                } else if (c == '-') {
+                    negative = true;
+                    i += 1;
+                }
+            },
+            .Prefix => {
+                state = .LeadingIntegerDigit;
+
+                // Match both 0x and 0X.
+                if (i + 2 > s.len or s[i] != '0' or s[i + 1] | 32 != 'x')
+                    return error.InvalidCharacter;
+                i += 2;
+            },
+            .LeadingIntegerDigit => {
+                if (c == '0') {
+                    // Skip leading zeros.
+                    i += 1;
+                } else if (c == '_') {
+                    return error.InvalidCharacter;
+                } else {
+                    state = .IntegerDigit;
+                }
+            },
+            .IntegerDigit => {
+                if (ascii.isXDigit(c)) {
+                    if (mantissa >= math.maxInt(u128) / 16)
+                        return error.Overflow;
+                    mantissa *%= 16;
+                    mantissa += try fmt.charToDigit(c, 16);
+                    i += 1;
+                } else if (c == '_') {
+                    i += 1;
+                } else {
+                    state = .MaybeDot;
+                }
+            },
+            .MaybeDot => {
+                if (c == '.') {
+                    state = .LeadingFractionDigit;
+                    i += 1;
+                } else state = .ExpPrefix;
+            },
+            .LeadingFractionDigit => {
+                if (c == '_') {
+                    return error.InvalidCharacter;
+                } else state = .FractionDigit;
+            },
+            .FractionDigit => {
+                if (ascii.isXDigit(c)) {
+                    if (mantissa < math.maxInt(u128) / 16) {
+                        mantissa *%= 16;
+                        mantissa +%= try fmt.charToDigit(c, 16);
+                        frac_scale += 1;
+                    } else if (c != '0') {
+                        return error.Overflow;
+                    }
+                    i += 1;
+                } else if (c == '_') {
+                    i += 1;
+                } else {
+                    state = .ExpPrefix;
+                }
+            },
+            .ExpPrefix => {
+                state = .MaybeExpSign;
+                // Match both p and P.
+                if (c | 32 != 'p')
+                    return error.InvalidCharacter;
+                i += 1;
+            },
+            .MaybeExpSign => {
+                state = .ExpDigit;
+
+                if (c == '+') {
+                    i += 1;
+                } else if (c == '-') {
+                    exp_negative = true;
+                    i += 1;
+                }
+            },
+            .ExpDigit => {
+                if (ascii.isXDigit(c)) {
+                    if (exponent >= math.maxInt(i16) / 10)
+                        return error.Overflow;
+                    exponent *%= 10;
+                    exponent +%= try fmt.charToDigit(c, 10);
+                    i += 1;
+                } else if (c == '_') {
+                    i += 1;
+                } else {
+                    return error.InvalidCharacter;
+                }
+            },
+        }
+    }
+
+    if (exp_negative)
+        exponent *= -1;
+
+    // Bring the decimal part to the left side of the decimal dot.
+    exponent -= frac_scale * 4;
+
+    if (mantissa == 0) {
+        // Signed zero.
+        return if (negative) -0.0 else 0.0;
+    }
+
+    // Divide by 2^mantissa_bits to right-align the mantissa in the fractional
+    // part.
+    exponent += mantissa_bits;
+
+    // Keep around two extra bits to correctly round any value that doesn't fit
+    // the available mantissa bits. The result LSB serves as Guard bit, the
+    // following one is the Round bit and the last one is the Sticky bit,
+    // computed by OR-ing all the dropped bits.
+
+    // Normalize by aligning the implicit one bit.
+    while (mantissa >> (mantissa_bits + 2) == 0) {
+        mantissa <<= 1;
+        exponent -= 1;
+    }
+
+    // Normalize again by dropping the excess precision.
+    // Note that the discarded bits are folded into the Sticky bit.
+    while (mantissa >> (mantissa_bits + 2 + 1) != 0) {
+        mantissa = mantissa >> 1 | (mantissa & 1);
+        exponent += 1;
+    }
+
+    // Very small numbers can be possibly represented as denormals, reduce the
+    // exponent as much as possible.
+    while (mantissa != 0 and exponent < exponent_min - 2) {
+        mantissa = mantissa >> 1 | (mantissa & 1);
+        exponent += 1;
+    }
+
+    // There are two cases to handle:
+    // - We've truncated more than 0.5ULP (R=S=1), increase the mantissa.
+    // - We've truncated exactly 0.5ULP (R=1 S=0), increase the mantissa if the
+    //   result is odd (G=1).
+    // The two checks can be neatly folded as follows.
+    mantissa |= @boolToInt(mantissa & 0b100 != 0);
+    mantissa += 1;
+
+    mantissa >>= 2;
+    exponent += 2;
+
+    if (mantissa & (1 << (mantissa_bits + 1)) != 0) {
+        // Renormalize, if the exponent overflows we'll catch that below.
+        mantissa >>= 1;
+        exponent += 1;
+    }
+
+    if (mantissa >> mantissa_bits == 0) {
+        // This is a denormal number, the biased exponent is zero.
+        exponent = -exponent_bias;
+    }
+
+    if (exponent > exponent_max) {
+        // Overflow, return +inf.
+        return math.inf(T);
+    }
+
+    // Remove the implicit bit.
+    mantissa &= @as(u128, (1 << mantissa_bits) - 1);
+
+    const raw: IntT =
+        (if (negative) @as(IntT, 1) << sign_shift else 0) |
+        @as(IntT, @bitCast(u16, exponent + exponent_bias)) << mantissa_bits |
+        @truncate(IntT, mantissa);
+
+    return @bitCast(T, raw);
+}
+
+test "special" {
+    testing.expect(math.isNan(try parseHexFloat(f32, "nAn")));
+    testing.expect(math.isPositiveInf(try parseHexFloat(f32, "iNf")));
+    testing.expect(math.isPositiveInf(try parseHexFloat(f32, "+Inf")));
+    testing.expect(math.isNegativeInf(try parseHexFloat(f32, "-iNf")));
+}
+test "zero" {
+    testing.expectEqual(@as(f32, 0.0), try parseHexFloat(f32, "0x0"));
+    testing.expectEqual(@as(f32, 0.0), try parseHexFloat(f32, "-0x0"));
+    testing.expectEqual(@as(f32, 0.0), try parseHexFloat(f32, "0x0p42"));
+    testing.expectEqual(@as(f32, 0.0), try parseHexFloat(f32, "-0x0.00000p42"));
+    testing.expectEqual(@as(f32, 0.0), try parseHexFloat(f32, "0x0.00000p666"));
+}
+
+test "f16" {
+    const Case = struct { s: []const u8, v: f16 };
+    const cases: []const Case = &[_]Case{
+        .{ .s = "0x1p0", .v = 1.0 },
+        .{ .s = "-0x1p-1", .v = -0.5 },
+        .{ .s = "0x10p+10", .v = 16384.0 },
+        .{ .s = "0x10p-10", .v = 0.015625 },
+        // Max normalized value.
+        .{ .s = "0x1.ffcp+15", .v = math.f16_max },
+        .{ .s = "-0x1.ffcp+15", .v = -math.f16_max },
+        // Min normalized value.
+        .{ .s = "0x1p-14", .v = math.f16_min },
+        .{ .s = "-0x1p-14", .v = -math.f16_min },
+        // Min denormal value.
+        .{ .s = "0x1p-24", .v = math.f16_true_min },
+        .{ .s = "-0x1p-24", .v = -math.f16_true_min },
+    };
+
+    for (cases) |case| {
+        testing.expectEqual(case.v, try parseHexFloat(f16, case.s));
+    }
+}
+test "f32" {
+    const Case = struct { s: []const u8, v: f32 };
+    const cases: []const Case = &[_]Case{
+        .{ .s = "0x1p0", .v = 1.0 },
+        .{ .s = "-0x1p-1", .v = -0.5 },
+        .{ .s = "0x10p+10", .v = 16384.0 },
+        .{ .s = "0x10p-10", .v = 0.015625 },
+        .{ .s = "0x0.ffffffp128", .v = 0x0.ffffffp128 },
+        .{ .s = "0x0.1234570p-125", .v = 0x0.1234570p-125 },
+        // Max normalized value.
+        .{ .s = "0x1.fffffeP+127", .v = math.f32_max },
+        .{ .s = "-0x1.fffffeP+127", .v = -math.f32_max },
+        // Min normalized value.
+        .{ .s = "0x1p-126", .v = math.f32_min },
+        .{ .s = "-0x1p-126", .v = -math.f32_min },
+        // Min denormal value.
+        .{ .s = "0x1P-149", .v = math.f32_true_min },
+        .{ .s = "-0x1P-149", .v = -math.f32_true_min },
+    };
+
+    for (cases) |case| {
+        testing.expectEqual(case.v, try parseHexFloat(f32, case.s));
+    }
+}
+test "f64" {
+    const Case = struct { s: []const u8, v: f64 };
+    const cases: []const Case = &[_]Case{
+        .{ .s = "0x1p0", .v = 1.0 },
+        .{ .s = "-0x1p-1", .v = -0.5 },
+        .{ .s = "0x10p+10", .v = 16384.0 },
+        .{ .s = "0x10p-10", .v = 0.015625 },
+        // Max normalized value.
+        .{ .s = "0x1.fffffffffffffp+1023", .v = math.f64_max },
+        .{ .s = "-0x1.fffffffffffffp1023", .v = -math.f64_max },
+        // Min normalized value.
+        .{ .s = "0x1p-1022", .v = math.f64_min },
+        .{ .s = "-0x1p-1022", .v = -math.f64_min },
+        // Min denormalized value.
+        .{ .s = "0x1p-1074", .v = math.f64_true_min },
+        .{ .s = "-0x1p-1074", .v = -math.f64_true_min },
+    };
+
+    for (cases) |case| {
+        testing.expectEqual(case.v, try parseHexFloat(f64, case.s));
+    }
+}
+test "f128" {
+    const Case = struct { s: []const u8, v: f128 };
+    const cases: []const Case = &[_]Case{
+        .{ .s = "0x1p0", .v = 1.0 },
+        .{ .s = "-0x1p-1", .v = -0.5 },
+        .{ .s = "0x10p+10", .v = 16384.0 },
+        .{ .s = "0x10p-10", .v = 0.015625 },
+        // Max normalized value.
+        .{ .s = "0xf.fffffffffffffffffffffffffff8p+16380", .v = math.f128_max },
+        .{ .s = "-0xf.fffffffffffffffffffffffffff8p+16380", .v = -math.f128_max },
+        // Min normalized value.
+        .{ .s = "0x1p-16382", .v = math.f128_min },
+        .{ .s = "-0x1p-16382", .v = -math.f128_min },
+        // // Min denormalized value.
+        .{ .s = "0x1p-16494", .v = math.f128_true_min },
+        .{ .s = "-0x1p-16494", .v = -math.f128_true_min },
+    };
+
+    for (cases) |case| {
+        testing.expectEqual(@bitCast(u128, case.v), @bitCast(u128, try parseHexFloat(f128, case.s)));
+    }
+}
lib/std/fmt.zig
@@ -1506,9 +1506,11 @@ test "parseUnsigned" {
 }
 
 pub const parseFloat = @import("fmt/parse_float.zig").parseFloat;
+pub const parseHexFloat = @import("fmt/parse_hex_float.zig").parseHexFloat;
 
-test "parseFloat" {
+test {
     _ = @import("fmt/parse_float.zig");
+    _ = @import("fmt/parse_hex_float.zig");
 }
 
 pub fn charToDigit(c: u8, radix: u8) (error{InvalidCharacter}!u8) {