master
1const std = @import("std");
2const common = @import("common.zig");
3const FloatStream = @import("FloatStream.zig");
4const isEightDigits = common.isEightDigits;
5const Number = common.Number;
6
7/// Parse 8 digits, loaded as bytes in little-endian order.
8///
9/// This uses the trick where every digit is in [0x030, 0x39],
10/// and therefore can be parsed in 3 multiplications, much
11/// faster than the normal 8.
12///
13/// This is based off the algorithm described in "Fast numeric string to
14/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
15fn parse8Digits(v_: u64) u64 {
16 var v = v_;
17 const mask = 0x0000_00ff_0000_00ff;
18 const mul1 = 0x000f_4240_0000_0064;
19 const mul2 = 0x0000_2710_0000_0001;
20 v -= 0x3030_3030_3030_3030;
21 v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
22 const v1 = (v & mask) *% mul1;
23 const v2 = ((v >> 16) & mask) *% mul2;
24 return @as(u64, @as(u32, @truncate((v1 +% v2) >> 32)));
25}
26
27/// Parse digits until a non-digit character is found.
28fn tryParseDigits(comptime T: type, stream: *FloatStream, x: *T, comptime base: u8) void {
29 // Try to parse 8 digits at a time, using an optimized algorithm.
30 // This only supports decimal digits.
31 if (base == 10) {
32 while (stream.hasLen(8)) {
33 const v = stream.readU64Unchecked();
34 if (!isEightDigits(v)) {
35 break;
36 }
37
38 x.* = x.* *% 1_0000_0000 +% parse8Digits(v);
39 stream.advance(8);
40 }
41 }
42
43 while (stream.scanDigit(base)) |digit| {
44 x.* *%= base;
45 x.* +%= digit;
46 }
47}
48
49fn min_n_digit_int(comptime T: type, digit_count: usize) T {
50 var n: T = 1;
51 var i: usize = 1;
52 while (i < digit_count) : (i += 1) n *= 10;
53 return n;
54}
55
56/// Parse up to N digits
57fn tryParseNDigits(comptime T: type, stream: *FloatStream, x: *T, comptime base: u8, comptime n: usize) void {
58 while (x.* < min_n_digit_int(T, n)) {
59 if (stream.scanDigit(base)) |digit| {
60 x.* *%= base;
61 x.* +%= digit;
62 } else {
63 break;
64 }
65 }
66}
67
68/// Parse the scientific notation component of a float.
69fn parseScientific(stream: *FloatStream) ?i64 {
70 var exponent: i64 = 0;
71 var negative = false;
72
73 if (stream.first()) |c| {
74 negative = c == '-';
75 if (c == '-' or c == '+') {
76 stream.advance(1);
77 }
78 }
79 if (stream.firstIsDigit(10)) {
80 while (stream.scanDigit(10)) |digit| {
81 // no overflows here, saturate well before overflow
82 if (exponent < 0x1000_0000) {
83 exponent = 10 * exponent + digit;
84 }
85 }
86
87 return if (negative) -exponent else exponent;
88 }
89
90 return null;
91}
92
93const ParseInfo = struct {
94 // 10 or 16
95 base: u8,
96 // 10^19 fits in u64, 16^16 fits in u64
97 max_mantissa_digits: usize,
98 // e.g. e or p (E and P also checked)
99 exp_char_lower: u8,
100};
101
102fn parsePartialNumberBase(comptime T: type, stream: *FloatStream, negative: bool, n: *usize, comptime info: ParseInfo) ?Number(T) {
103 std.debug.assert(info.base == 10 or info.base == 16);
104 const MantissaT = common.mantissaType(T);
105
106 // parse initial digits before dot
107 var mantissa: MantissaT = 0;
108 tryParseDigits(MantissaT, stream, &mantissa, info.base);
109 const int_end = stream.offsetTrue();
110 var n_digits = @as(isize, @intCast(stream.offsetTrue()));
111
112 // handle dot with the following digits
113 var exponent: i64 = 0;
114 if (stream.firstIs(".")) {
115 stream.advance(1);
116 const marker = stream.offsetTrue();
117 tryParseDigits(MantissaT, stream, &mantissa, info.base);
118 const n_after_dot = stream.offsetTrue() - marker;
119 exponent = -@as(i64, @intCast(n_after_dot));
120 n_digits += @as(isize, @intCast(n_after_dot));
121 }
122
123 // adjust required shift to offset mantissa for base-16 (2^4)
124 if (info.base == 16) {
125 exponent *= 4;
126 }
127
128 if (n_digits == 0) {
129 return null;
130 }
131
132 // handle scientific format
133 var exp_number: i64 = 0;
134 if (stream.firstIsLower(&.{info.exp_char_lower})) {
135 stream.advance(1);
136 exp_number = parseScientific(stream) orelse return null;
137 exponent += exp_number;
138 }
139
140 const len = stream.offset; // length must be complete parsed length
141 n.* += len;
142
143 if (stream.underscore_count > 0 and !validUnderscores(stream.slice, info.base)) {
144 return null;
145 }
146
147 // common case with not many digits
148 if (n_digits <= info.max_mantissa_digits) {
149 return Number(T){
150 .exponent = exponent,
151 .mantissa = mantissa,
152 .negative = negative,
153 .many_digits = false,
154 .hex = info.base == 16,
155 };
156 }
157
158 n_digits -= info.max_mantissa_digits;
159 var many_digits = false;
160 stream.reset(); // re-parse from beginning
161 while (stream.firstIs("0._")) {
162 // '0' = '.' + 2
163 const next = stream.firstUnchecked();
164 if (next != '_') {
165 n_digits -= @as(isize, @intCast(next -| ('0' - 1)));
166 } else {
167 stream.underscore_count += 1;
168 }
169 stream.advance(1);
170 }
171 if (n_digits > 0) {
172 // at this point we have more than max_mantissa_digits significant digits, let's try again
173 many_digits = true;
174 mantissa = 0;
175 stream.reset();
176 tryParseNDigits(MantissaT, stream, &mantissa, info.base, info.max_mantissa_digits);
177
178 exponent = blk: {
179 if (mantissa >= min_n_digit_int(MantissaT, info.max_mantissa_digits)) {
180 // big int
181 break :blk @as(i64, @intCast(int_end)) - @as(i64, @intCast(stream.offsetTrue()));
182 } else {
183 // the next byte must be present and be '.'
184 // We know this is true because we had more than 19
185 // digits previously, so we overflowed a 64-bit integer,
186 // but parsing only the integral digits produced less
187 // than 19 digits. That means we must have a decimal
188 // point, and at least 1 fractional digit.
189 stream.advance(1);
190 const marker = stream.offsetTrue();
191 tryParseNDigits(MantissaT, stream, &mantissa, info.base, info.max_mantissa_digits);
192 break :blk @as(i64, @intCast(marker)) - @as(i64, @intCast(stream.offsetTrue()));
193 }
194 };
195 if (info.base == 16) {
196 exponent *= 4;
197 }
198 // add back the explicit part
199 exponent += exp_number;
200 }
201
202 return Number(T){
203 .exponent = exponent,
204 .mantissa = mantissa,
205 .negative = negative,
206 .many_digits = many_digits,
207 .hex = info.base == 16,
208 };
209}
210
211/// Parse a partial, non-special floating point number.
212///
213/// This creates a representation of the float as the
214/// significant digits and the decimal exponent.
215fn parsePartialNumber(comptime T: type, s: []const u8, negative: bool, n: *usize) ?Number(T) {
216 std.debug.assert(s.len != 0);
217 const MantissaT = common.mantissaType(T);
218 n.* = 0;
219
220 if (s.len >= 2 and s[0] == '0' and std.ascii.toLower(s[1]) == 'x') {
221 var stream = FloatStream.init(s[2..]);
222 n.* += 2;
223 return parsePartialNumberBase(T, &stream, negative, n, .{
224 .base = 16,
225 .max_mantissa_digits = if (MantissaT == u64) 16 else 32,
226 .exp_char_lower = 'p',
227 });
228 } else {
229 var stream = FloatStream.init(s);
230 return parsePartialNumberBase(T, &stream, negative, n, .{
231 .base = 10,
232 .max_mantissa_digits = if (MantissaT == u64) 19 else 38,
233 .exp_char_lower = 'e',
234 });
235 }
236}
237
238pub fn parseNumber(comptime T: type, s: []const u8, negative: bool) ?Number(T) {
239 var consumed: usize = 0;
240 if (parsePartialNumber(T, s, negative, &consumed)) |number| {
241 // must consume entire float (no trailing data)
242 if (s.len == consumed) {
243 return number;
244 }
245 }
246 return null;
247}
248
249fn parsePartialInfOrNan(comptime T: type, s: []const u8, negative: bool, n: *usize) ?T {
250 // inf/infinity; infxxx should only consume inf.
251 if (std.ascii.startsWithIgnoreCase(s, "inf")) {
252 n.* = 3;
253 if (std.ascii.startsWithIgnoreCase(s[3..], "inity")) {
254 n.* = 8;
255 }
256
257 return if (!negative) std.math.inf(T) else -std.math.inf(T);
258 }
259
260 if (std.ascii.startsWithIgnoreCase(s, "nan")) {
261 n.* = 3;
262 return std.math.nan(T);
263 }
264
265 return null;
266}
267
268pub fn parseInfOrNan(comptime T: type, s: []const u8, negative: bool) ?T {
269 var consumed: usize = 0;
270 if (parsePartialInfOrNan(T, s, negative, &consumed)) |special| {
271 if (s.len == consumed) {
272 return special;
273 }
274 }
275 return null;
276}
277
278pub fn validUnderscores(s: []const u8, comptime base: u8) bool {
279 var i: usize = 0;
280 while (i < s.len) : (i += 1) {
281 if (s[i] == '_') {
282 // underscore at start of end
283 if (i == 0 or i + 1 == s.len) {
284 return false;
285 }
286 // consecutive underscores
287 if (!common.isDigit(s[i - 1], base) or !common.isDigit(s[i + 1], base)) {
288 return false;
289 }
290
291 // next is guaranteed a digit, skip an extra
292 i += 1;
293 }
294 }
295
296 return true;
297}