master
  1const std = @import("std");
  2const common = @import("common.zig");
  3const FloatStream = @import("FloatStream.zig");
  4const isEightDigits = common.isEightDigits;
  5const Number = common.Number;
  6
  7/// Parse 8 digits, loaded as bytes in little-endian order.
  8///
  9/// This uses the trick where every digit is in [0x030, 0x39],
 10/// and therefore can be parsed in 3 multiplications, much
 11/// faster than the normal 8.
 12///
 13/// This is based off the algorithm described in "Fast numeric string to
 14/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
 15fn parse8Digits(v_: u64) u64 {
 16    var v = v_;
 17    const mask = 0x0000_00ff_0000_00ff;
 18    const mul1 = 0x000f_4240_0000_0064;
 19    const mul2 = 0x0000_2710_0000_0001;
 20    v -= 0x3030_3030_3030_3030;
 21    v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
 22    const v1 = (v & mask) *% mul1;
 23    const v2 = ((v >> 16) & mask) *% mul2;
 24    return @as(u64, @as(u32, @truncate((v1 +% v2) >> 32)));
 25}
 26
 27/// Parse digits until a non-digit character is found.
 28fn tryParseDigits(comptime T: type, stream: *FloatStream, x: *T, comptime base: u8) void {
 29    // Try to parse 8 digits at a time, using an optimized algorithm.
 30    // This only supports decimal digits.
 31    if (base == 10) {
 32        while (stream.hasLen(8)) {
 33            const v = stream.readU64Unchecked();
 34            if (!isEightDigits(v)) {
 35                break;
 36            }
 37
 38            x.* = x.* *% 1_0000_0000 +% parse8Digits(v);
 39            stream.advance(8);
 40        }
 41    }
 42
 43    while (stream.scanDigit(base)) |digit| {
 44        x.* *%= base;
 45        x.* +%= digit;
 46    }
 47}
 48
 49fn min_n_digit_int(comptime T: type, digit_count: usize) T {
 50    var n: T = 1;
 51    var i: usize = 1;
 52    while (i < digit_count) : (i += 1) n *= 10;
 53    return n;
 54}
 55
 56/// Parse up to N digits
 57fn tryParseNDigits(comptime T: type, stream: *FloatStream, x: *T, comptime base: u8, comptime n: usize) void {
 58    while (x.* < min_n_digit_int(T, n)) {
 59        if (stream.scanDigit(base)) |digit| {
 60            x.* *%= base;
 61            x.* +%= digit;
 62        } else {
 63            break;
 64        }
 65    }
 66}
 67
 68/// Parse the scientific notation component of a float.
 69fn parseScientific(stream: *FloatStream) ?i64 {
 70    var exponent: i64 = 0;
 71    var negative = false;
 72
 73    if (stream.first()) |c| {
 74        negative = c == '-';
 75        if (c == '-' or c == '+') {
 76            stream.advance(1);
 77        }
 78    }
 79    if (stream.firstIsDigit(10)) {
 80        while (stream.scanDigit(10)) |digit| {
 81            // no overflows here, saturate well before overflow
 82            if (exponent < 0x1000_0000) {
 83                exponent = 10 * exponent + digit;
 84            }
 85        }
 86
 87        return if (negative) -exponent else exponent;
 88    }
 89
 90    return null;
 91}
 92
 93const ParseInfo = struct {
 94    // 10 or 16
 95    base: u8,
 96    // 10^19 fits in u64, 16^16 fits in u64
 97    max_mantissa_digits: usize,
 98    // e.g. e or p (E and P also checked)
 99    exp_char_lower: u8,
100};
101
102fn parsePartialNumberBase(comptime T: type, stream: *FloatStream, negative: bool, n: *usize, comptime info: ParseInfo) ?Number(T) {
103    std.debug.assert(info.base == 10 or info.base == 16);
104    const MantissaT = common.mantissaType(T);
105
106    // parse initial digits before dot
107    var mantissa: MantissaT = 0;
108    tryParseDigits(MantissaT, stream, &mantissa, info.base);
109    const int_end = stream.offsetTrue();
110    var n_digits = @as(isize, @intCast(stream.offsetTrue()));
111
112    // handle dot with the following digits
113    var exponent: i64 = 0;
114    if (stream.firstIs(".")) {
115        stream.advance(1);
116        const marker = stream.offsetTrue();
117        tryParseDigits(MantissaT, stream, &mantissa, info.base);
118        const n_after_dot = stream.offsetTrue() - marker;
119        exponent = -@as(i64, @intCast(n_after_dot));
120        n_digits += @as(isize, @intCast(n_after_dot));
121    }
122
123    // adjust required shift to offset mantissa for base-16 (2^4)
124    if (info.base == 16) {
125        exponent *= 4;
126    }
127
128    if (n_digits == 0) {
129        return null;
130    }
131
132    // handle scientific format
133    var exp_number: i64 = 0;
134    if (stream.firstIsLower(&.{info.exp_char_lower})) {
135        stream.advance(1);
136        exp_number = parseScientific(stream) orelse return null;
137        exponent += exp_number;
138    }
139
140    const len = stream.offset; // length must be complete parsed length
141    n.* += len;
142
143    if (stream.underscore_count > 0 and !validUnderscores(stream.slice, info.base)) {
144        return null;
145    }
146
147    // common case with not many digits
148    if (n_digits <= info.max_mantissa_digits) {
149        return Number(T){
150            .exponent = exponent,
151            .mantissa = mantissa,
152            .negative = negative,
153            .many_digits = false,
154            .hex = info.base == 16,
155        };
156    }
157
158    n_digits -= info.max_mantissa_digits;
159    var many_digits = false;
160    stream.reset(); // re-parse from beginning
161    while (stream.firstIs("0._")) {
162        // '0' = '.' + 2
163        const next = stream.firstUnchecked();
164        if (next != '_') {
165            n_digits -= @as(isize, @intCast(next -| ('0' - 1)));
166        } else {
167            stream.underscore_count += 1;
168        }
169        stream.advance(1);
170    }
171    if (n_digits > 0) {
172        // at this point we have more than max_mantissa_digits significant digits, let's try again
173        many_digits = true;
174        mantissa = 0;
175        stream.reset();
176        tryParseNDigits(MantissaT, stream, &mantissa, info.base, info.max_mantissa_digits);
177
178        exponent = blk: {
179            if (mantissa >= min_n_digit_int(MantissaT, info.max_mantissa_digits)) {
180                // big int
181                break :blk @as(i64, @intCast(int_end)) - @as(i64, @intCast(stream.offsetTrue()));
182            } else {
183                // the next byte must be present and be '.'
184                // We know this is true because we had more than 19
185                // digits previously, so we overflowed a 64-bit integer,
186                // but parsing only the integral digits produced less
187                // than 19 digits. That means we must have a decimal
188                // point, and at least 1 fractional digit.
189                stream.advance(1);
190                const marker = stream.offsetTrue();
191                tryParseNDigits(MantissaT, stream, &mantissa, info.base, info.max_mantissa_digits);
192                break :blk @as(i64, @intCast(marker)) - @as(i64, @intCast(stream.offsetTrue()));
193            }
194        };
195        if (info.base == 16) {
196            exponent *= 4;
197        }
198        // add back the explicit part
199        exponent += exp_number;
200    }
201
202    return Number(T){
203        .exponent = exponent,
204        .mantissa = mantissa,
205        .negative = negative,
206        .many_digits = many_digits,
207        .hex = info.base == 16,
208    };
209}
210
211/// Parse a partial, non-special floating point number.
212///
213/// This creates a representation of the float as the
214/// significant digits and the decimal exponent.
215fn parsePartialNumber(comptime T: type, s: []const u8, negative: bool, n: *usize) ?Number(T) {
216    std.debug.assert(s.len != 0);
217    const MantissaT = common.mantissaType(T);
218    n.* = 0;
219
220    if (s.len >= 2 and s[0] == '0' and std.ascii.toLower(s[1]) == 'x') {
221        var stream = FloatStream.init(s[2..]);
222        n.* += 2;
223        return parsePartialNumberBase(T, &stream, negative, n, .{
224            .base = 16,
225            .max_mantissa_digits = if (MantissaT == u64) 16 else 32,
226            .exp_char_lower = 'p',
227        });
228    } else {
229        var stream = FloatStream.init(s);
230        return parsePartialNumberBase(T, &stream, negative, n, .{
231            .base = 10,
232            .max_mantissa_digits = if (MantissaT == u64) 19 else 38,
233            .exp_char_lower = 'e',
234        });
235    }
236}
237
238pub fn parseNumber(comptime T: type, s: []const u8, negative: bool) ?Number(T) {
239    var consumed: usize = 0;
240    if (parsePartialNumber(T, s, negative, &consumed)) |number| {
241        // must consume entire float (no trailing data)
242        if (s.len == consumed) {
243            return number;
244        }
245    }
246    return null;
247}
248
249fn parsePartialInfOrNan(comptime T: type, s: []const u8, negative: bool, n: *usize) ?T {
250    // inf/infinity; infxxx should only consume inf.
251    if (std.ascii.startsWithIgnoreCase(s, "inf")) {
252        n.* = 3;
253        if (std.ascii.startsWithIgnoreCase(s[3..], "inity")) {
254            n.* = 8;
255        }
256
257        return if (!negative) std.math.inf(T) else -std.math.inf(T);
258    }
259
260    if (std.ascii.startsWithIgnoreCase(s, "nan")) {
261        n.* = 3;
262        return std.math.nan(T);
263    }
264
265    return null;
266}
267
268pub fn parseInfOrNan(comptime T: type, s: []const u8, negative: bool) ?T {
269    var consumed: usize = 0;
270    if (parsePartialInfOrNan(T, s, negative, &consumed)) |special| {
271        if (s.len == consumed) {
272            return special;
273        }
274    }
275    return null;
276}
277
278pub fn validUnderscores(s: []const u8, comptime base: u8) bool {
279    var i: usize = 0;
280    while (i < s.len) : (i += 1) {
281        if (s[i] == '_') {
282            // underscore at start of end
283            if (i == 0 or i + 1 == s.len) {
284                return false;
285            }
286            // consecutive underscores
287            if (!common.isDigit(s[i - 1], base) or !common.isDigit(s[i + 1], base)) {
288                return false;
289            }
290
291            // next is guaranteed a digit, skip an extra
292            i += 1;
293        }
294    }
295
296    return true;
297}