master
1const std = @import("std");
2const math = std.math;
3const common = @import("common.zig");
4const BiasedFp = common.BiasedFp;
5const Decimal = @import("decimal.zig").Decimal;
6const mantissaType = common.mantissaType;
7
8const max_shift = 60;
9const num_powers = 19;
10const powers = [_]u8{ 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59 };
11
12pub fn getShift(n: usize) usize {
13 return if (n < num_powers) powers[n] else max_shift;
14}
15
16/// Parse the significant digits and biased, binary exponent of a float.
17///
18/// This is a fallback algorithm that uses a big-integer representation
19/// of the float, and therefore is considerably slower than faster
20/// approximations. However, it will always determine how to round
21/// the significant digits to the nearest machine float, allowing
22/// use to handle near half-way cases.
23///
24/// Near half-way cases are halfway between two consecutive machine floats.
25/// For example, the float `16777217.0` has a bitwise representation of
26/// `100000000000000000000000 1`. Rounding to a single-precision float,
27/// the trailing `1` is truncated. Using round-nearest, tie-even, any
28/// value above `16777217.0` must be rounded up to `16777218.0`, while
29/// any value before or equal to `16777217.0` must be rounded down
30/// to `16777216.0`. These near-halfway conversions therefore may require
31/// a large number of digits to unambiguously determine how to round.
32///
33/// The algorithms described here are based on "Processing Long Numbers Quickly",
34/// available here: <https://arxiv.org/pdf/2101.11408.pdf#section.11>.
35///
36/// Note that this function needs a lot of stack space and is marked
37/// cold to hint against inlining into the caller.
38pub fn convertSlow(comptime T: type, s: []const u8) BiasedFp(T) {
39 @branchHint(.cold);
40
41 const MantissaT = mantissaType(T);
42 const min_exponent = -(1 << (math.floatExponentBits(T) - 1)) + 1;
43 const infinite_power = (1 << math.floatExponentBits(T)) - 1;
44 const fractional_bits = math.floatFractionalBits(T);
45
46 var d = Decimal(T).parse(s); // no need to recheck underscores
47 if (d.num_digits == 0 or d.decimal_point < Decimal(T).min_exponent) {
48 return BiasedFp(T).zero();
49 } else if (d.decimal_point >= Decimal(T).max_exponent) {
50 return BiasedFp(T).inf(T);
51 }
52
53 var exp2: i32 = 0;
54 // Shift right toward (1/2 .. 1]
55 while (d.decimal_point > 0) {
56 const n = @as(usize, @intCast(d.decimal_point));
57 const shift = getShift(n);
58 d.rightShift(shift);
59 if (d.decimal_point < -Decimal(T).decimal_point_range) {
60 return BiasedFp(T).zero();
61 }
62 exp2 += @as(i32, @intCast(shift));
63 }
64 // Shift left toward (1/2 .. 1]
65 while (d.decimal_point <= 0) {
66 const shift = blk: {
67 if (d.decimal_point == 0) {
68 break :blk switch (d.digits[0]) {
69 5...9 => break,
70 0, 1 => @as(usize, 2),
71 else => 1,
72 };
73 } else {
74 const n = @as(usize, @intCast(-d.decimal_point));
75 break :blk getShift(n);
76 }
77 };
78 d.leftShift(shift);
79 if (d.decimal_point > Decimal(T).decimal_point_range) {
80 return BiasedFp(T).inf(T);
81 }
82 exp2 -= @as(i32, @intCast(shift));
83 }
84 // We are now in the range [1/2 .. 1] but the binary format uses [1 .. 2]
85 exp2 -= 1;
86 while (min_exponent + 1 > exp2) {
87 var n = @as(usize, @intCast((min_exponent + 1) - exp2));
88 if (n > max_shift) {
89 n = max_shift;
90 }
91 d.rightShift(n);
92 exp2 += @as(i32, @intCast(n));
93 }
94 if (exp2 - min_exponent >= infinite_power) {
95 return BiasedFp(T).inf(T);
96 }
97
98 // Shift the decimal to the hidden bit, and then round the value
99 // to get the high mantissa+1 bits.
100 d.leftShift(fractional_bits + 1);
101 var mantissa = d.round();
102 if (mantissa >= (@as(MantissaT, 1) << (fractional_bits + 1))) {
103 // Rounding up overflowed to the carry bit, need to
104 // shift back to the hidden bit.
105 d.rightShift(1);
106 exp2 += 1;
107 mantissa = d.round();
108 if ((exp2 - min_exponent) >= infinite_power) {
109 return BiasedFp(T).inf(T);
110 }
111 }
112 var power2 = exp2 - min_exponent;
113 if (mantissa < (@as(MantissaT, 1) << fractional_bits)) {
114 power2 -= 1;
115 }
116 // Zero out all the bits above the mantissa bits.
117 mantissa &= (@as(MantissaT, 1) << math.floatMantissaBits(T)) - 1;
118 return .{ .f = mantissa, .e = power2 };
119}