Commit 7aac21c6f5
Changed files (6)
lib
std
special
compiler_rt
test
stage1
behavior
doc/langref.html.in
@@ -885,6 +885,12 @@ const hex_int = 0xff;
const another_hex_int = 0xFF;
const octal_int = 0o755;
const binary_int = 0b11110000;
+
+// underscores may be placed between two digits as a visual separator
+const one_billion = 1_000_000_000;
+const binary_mask = 0b1_1111_1111;
+const permissions = 0o7_5_5;
+const big_address = 0xFF80_0000_0000_0000;
{#code_end#}
{#header_close#}
{#header_open|Runtime Integer Values#}
@@ -947,6 +953,11 @@ const yet_another = 123.0e+77;
const hex_floating_point = 0x103.70p-5;
const another_hex_float = 0x103.70;
const yet_another_hex_float = 0x103.70P-5;
+
+// underscores may be placed between two digits as a visual separator
+const lightspeed = 299_792_458.000_000;
+const nanosecond = 0.000_000_001;
+const more_hex = 0x1234_5678.9ABC_CDEFp-10;
{#code_end#}
<p>
There is no syntax for NaN, infinity, or negative infinity. For these special values,
lib/std/special/compiler_rt/floatundisf.zig
@@ -69,23 +69,23 @@ test "floatundisf" {
test__floatundisf(0, 0.0);
test__floatundisf(1, 1.0);
test__floatundisf(2, 2.0);
- test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62F);
- test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62F);
- test__floatundisf(0x8000008000000000, 0x1p+63F);
- test__floatundisf(0x8000010000000000, 0x1.000002p+63F);
- test__floatundisf(0x8000000000000000, 0x1p+63F);
- test__floatundisf(0x8000000000000001, 0x1p+63F);
- test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64F);
- test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64F);
- test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50F);
- test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50F);
- test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50F);
+ test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62);
+ test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62);
+ test__floatundisf(0x8000008000000000, 0x1p+63);
+ test__floatundisf(0x8000010000000000, 0x1.000002p+63);
+ test__floatundisf(0x8000000000000000, 0x1p+63);
+ test__floatundisf(0x8000000000000001, 0x1p+63);
+ test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64);
+ test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64);
+ test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50);
+ test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50);
+ test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50);
}
src/parse_f128.c
@@ -165,22 +165,36 @@ static long long scanexp(struct MuslFILE *f, int pok)
int x;
long long y;
int neg = 0;
-
+
c = shgetc(f);
if (c=='+' || c=='-') {
neg = (c=='-');
c = shgetc(f);
if (c-'0'>=10U && pok) shunget(f);
}
- if (c-'0'>=10U) {
+ if (c-'0'>=10U && c!='_') {
shunget(f);
return LLONG_MIN;
}
- for (x=0; c-'0'<10U && x<INT_MAX/10; c = shgetc(f))
- x = 10*x + c-'0';
- for (y=x; c-'0'<10U && y<LLONG_MAX/100; c = shgetc(f))
- y = 10*y + c-'0';
- for (; c-'0'<10U; c = shgetc(f));
+ for (x=0; ; c = shgetc(f)) {
+ if (c=='_') {
+ continue;
+ } else if (c-'0'<10U && x<INT_MAX/10) {
+ x = 10*x + c-'0';
+ } else {
+ break;
+ }
+ }
+ for (y=x; ; c = shgetc(f)) {
+ if (c=='_') {
+ continue;
+ } else if (c-'0'<10U && y<LLONG_MAX/100) {
+ y = 10*y + c-'0';
+ } else {
+ break;
+ }
+ }
+ for (; c-'0'<10U || c=='_'; c = shgetc(f));
shunget(f);
return neg ? -y : y;
}
@@ -450,16 +464,36 @@ static float128_t decfloat(struct MuslFILE *f, int c, int bits, int emin, int si
j=0;
k=0;
- /* Don't let leading zeros consume buffer space */
- for (; c=='0'; c = shgetc(f)) gotdig=1;
+ /* Don't let leading zeros/underscores consume buffer space */
+ for (; ; c = shgetc(f)) {
+ if (c=='_') {
+ continue;
+ } else if (c=='0') {
+ gotdig=1;
+ } else {
+ break;
+ }
+ }
+
if (c=='.') {
gotrad = 1;
- for (c = shgetc(f); c=='0'; c = shgetc(f)) gotdig=1, lrp--;
+ for (c = shgetc(f); ; c = shgetc(f)) {
+ if (c == '_') {
+ continue;
+ } else if (c=='0') {
+ gotdig=1;
+ lrp--;
+ } else {
+ break;
+ }
+ }
}
x[0] = 0;
- for (; c-'0'<10U || c=='.'; c = shgetc(f)) {
- if (c == '.') {
+ for (; c-'0'<10U || c=='.' || c=='_'; c = shgetc(f)) {
+ if (c == '_') {
+ continue;
+ } else if (c == '.') {
if (gotrad) break;
gotrad = 1;
lrp = dc;
@@ -773,18 +807,29 @@ static float128_t hexfloat(struct MuslFILE *f, int bits, int emin, int sign, int
c = shgetc(f);
- /* Skip leading zeros */
- for (; c=='0'; c = shgetc(f)) gotdig = 1;
+ /* Skip leading zeros/underscores */
+ for (; c=='0' || c=='_'; c = shgetc(f)) gotdig = 1;
if (c=='.') {
gotrad = 1;
c = shgetc(f);
/* Count zeros after the radix point before significand */
- for (rp=0; c=='0'; c = shgetc(f), rp--) gotdig = 1;
+ for (rp=0; ; c = shgetc(f)) {
+ if (c == '_') {
+ continue;
+ } else if (c == '0') {
+ gotdig = 1;
+ rp--;
+ } else {
+ break;
+ }
+ }
}
- for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; c = shgetc(f)) {
- if (c=='.') {
+ for (; c-'0'<10U || (c|32)-'a'<6U || c=='.' || c=='_'; c = shgetc(f)) {
+ if (c=='_') {
+ continue;
+ } else if (c=='.') {
if (gotrad) break;
rp = dc;
gotrad = 1;
src/tokenizer.cpp
@@ -177,10 +177,13 @@ enum TokenizeState {
TokenizeStateSymbol,
TokenizeStateZero, // "0", which might lead to "0x"
TokenizeStateNumber, // "123", "0x123"
+ TokenizeStateNumberNoUnderscore, // "12_", "0x12_" next char must be digit
TokenizeStateNumberDot,
TokenizeStateFloatFraction, // "123.456", "0x123.456"
+ TokenizeStateFloatFractionNoUnderscore, // "123.45_", "0x123.45_"
TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p"
- TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5"
+ TokenizeStateFloatExponentNumber, // "123.456e7", "123.456e+7", "123.456e-7"
+ TokenizeStateFloatExponentNumberNoUnderscore, // "123.456e7_", "123.456e+7_", "123.456e-7_"
TokenizeStateString,
TokenizeStateStringEscape,
TokenizeStateStringEscapeUnicodeStart,
@@ -233,14 +236,10 @@ struct Tokenize {
Token *cur_tok;
Tokenization *out;
uint32_t radix;
- int32_t exp_add_amt;
- bool is_exp_negative;
+ bool is_trailing_underscore;
size_t char_code_index;
bool unicode;
uint32_t char_code;
- int exponent_in_bin_or_dec;
- BigInt specified_exponent;
- BigInt significand;
size_t remaining_code_units;
};
@@ -426,20 +425,16 @@ void tokenize(Buf *buf, Tokenization *out) {
case '0':
t.state = TokenizeStateZero;
begin_token(&t, TokenIdIntLiteral);
+ t.is_trailing_underscore = false;
t.radix = 10;
- t.exp_add_amt = 1;
- t.exponent_in_bin_or_dec = 0;
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, 0);
- bigint_init_unsigned(&t.specified_exponent, 0);
break;
case DIGIT_NON_ZERO:
t.state = TokenizeStateNumber;
begin_token(&t, TokenIdIntLiteral);
+ t.is_trailing_underscore = false;
t.radix = 10;
- t.exp_add_amt = 1;
- t.exponent_in_bin_or_dec = 0;
bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c));
- bigint_init_unsigned(&t.specified_exponent, 0);
break;
case '"':
begin_token(&t, TokenIdStringLiteral);
@@ -1189,17 +1184,15 @@ void tokenize(Buf *buf, Tokenization *out) {
switch (c) {
case 'b':
t.radix = 2;
- t.state = TokenizeStateNumber;
+ t.state = TokenizeStateNumberNoUnderscore;
break;
case 'o':
t.radix = 8;
- t.exp_add_amt = 3;
- t.state = TokenizeStateNumber;
+ t.state = TokenizeStateNumberNoUnderscore;
break;
case 'x':
t.radix = 16;
- t.exp_add_amt = 4;
- t.state = TokenizeStateNumber;
+ t.state = TokenizeStateNumberNoUnderscore;
break;
default:
// reinterpret as normal number
@@ -1208,9 +1201,27 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
+ case TokenizeStateNumberNoUnderscore:
+ if (c == '_') {
+ invalid_char_error(&t, c);
+ break;
+ } else if (get_digit_value(c) < t.radix) {
+ t.is_trailing_underscore = false;
+ t.state = TokenizeStateNumber;
+ }
+ // fall through
case TokenizeStateNumber:
{
+ if (c == '_') {
+ t.is_trailing_underscore = true;
+ t.state = TokenizeStateNumberNoUnderscore;
+ break;
+ }
if (c == '.') {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
if (t.radix != 16 && t.radix != 10) {
invalid_char_error(&t, c);
}
@@ -1222,13 +1233,18 @@ void tokenize(Buf *buf, Tokenization *out) {
invalid_char_error(&t, c);
}
t.state = TokenizeStateFloatExponentUnsigned;
+ t.radix = 10; // exponent is always base 10
assert(t.cur_tok->id == TokenIdIntLiteral);
- bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
break;
}
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
+
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@@ -1259,20 +1275,37 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
t.pos -= 1;
- t.state = TokenizeStateFloatFraction;
+ t.state = TokenizeStateFloatFractionNoUnderscore;
assert(t.cur_tok->id == TokenIdIntLiteral);
- bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint);
set_token_id(&t, t.cur_tok, TokenIdFloatLiteral);
continue;
}
+ case TokenizeStateFloatFractionNoUnderscore:
+ if (c == '_') {
+ invalid_char_error(&t, c);
+ } else if (get_digit_value(c) < t.radix) {
+ t.is_trailing_underscore = false;
+ t.state = TokenizeStateFloatFraction;
+ }
+ // fall through
case TokenizeStateFloatFraction:
{
+ if (c == '_') {
+ t.is_trailing_underscore = true;
+ t.state = TokenizeStateFloatFractionNoUnderscore;
+ break;
+ }
if (is_exponent_signifier(c, t.radix)) {
t.state = TokenizeStateFloatExponentUnsigned;
+ t.radix = 10; // exponent is always base 10
break;
}
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@@ -1282,46 +1315,47 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart;
continue;
}
- t.exponent_in_bin_or_dec -= t.exp_add_amt;
- if (t.radix == 10) {
- // For now we use strtod to parse decimal floats, so we just have to get to the
- // end of the token.
- break;
- }
- BigInt digit_value_bi;
- bigint_init_unsigned(&digit_value_bi, digit_value);
- BigInt radix_bi;
- bigint_init_unsigned(&radix_bi, t.radix);
-
- BigInt multiplied;
- bigint_mul(&multiplied, &t.significand, &radix_bi);
-
- bigint_add(&t.significand, &multiplied, &digit_value_bi);
- break;
+ // we use parse_f128 to generate the float literal, so just
+ // need to get to the end of the token
}
+ break;
case TokenizeStateFloatExponentUnsigned:
switch (c) {
case '+':
- t.is_exp_negative = false;
- t.state = TokenizeStateFloatExponentNumber;
+ t.state = TokenizeStateFloatExponentNumberNoUnderscore;
break;
case '-':
- t.is_exp_negative = true;
- t.state = TokenizeStateFloatExponentNumber;
+ t.state = TokenizeStateFloatExponentNumberNoUnderscore;
break;
default:
// reinterpret as normal exponent number
t.pos -= 1;
- t.is_exp_negative = false;
- t.state = TokenizeStateFloatExponentNumber;
+ t.state = TokenizeStateFloatExponentNumberNoUnderscore;
continue;
}
break;
+ case TokenizeStateFloatExponentNumberNoUnderscore:
+ if (c == '_') {
+ invalid_char_error(&t, c);
+ } else if (get_digit_value(c) < t.radix) {
+ t.is_trailing_underscore = false;
+ t.state = TokenizeStateFloatExponentNumber;
+ }
+ // fall through
case TokenizeStateFloatExponentNumber:
{
+ if (c == '_') {
+ t.is_trailing_underscore = true;
+ t.state = TokenizeStateFloatExponentNumberNoUnderscore;
+ break;
+ }
uint32_t digit_value = get_digit_value(c);
if (digit_value >= t.radix) {
+ if (t.is_trailing_underscore) {
+ invalid_char_error(&t, c);
+ break;
+ }
if (is_symbol_char(c)) {
invalid_char_error(&t, c);
}
@@ -1331,21 +1365,9 @@ void tokenize(Buf *buf, Tokenization *out) {
t.state = TokenizeStateStart;
continue;
}
- if (t.radix == 10) {
- // For now we use strtod to parse decimal floats, so we just have to get to the
- // end of the token.
- break;
- }
- BigInt digit_value_bi;
- bigint_init_unsigned(&digit_value_bi, digit_value);
-
- BigInt radix_bi;
- bigint_init_unsigned(&radix_bi, 10);
-
- BigInt multiplied;
- bigint_mul(&multiplied, &t.specified_exponent, &radix_bi);
- bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi);
+ // we use parse_f128 to generate the float literal, so just
+ // need to get to the end of the token
}
break;
case TokenizeStateSawDash:
@@ -1399,6 +1421,9 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateStart:
case TokenizeStateError:
break;
+ case TokenizeStateNumberNoUnderscore:
+ case TokenizeStateFloatFractionNoUnderscore:
+ case TokenizeStateFloatExponentNumberNoUnderscore:
case TokenizeStateNumberDot:
tokenize_error(&t, "unterminated number literal");
break;
test/stage1/behavior/math.zig
@@ -411,6 +411,34 @@ test "quad hex float literal parsing accurate" {
comptime S.doTheTest();
}
+test "underscore separator parsing" {
+ expect(0_0_0_0 == 0);
+ expect(1_234_567 == 1234567);
+ expect(001_234_567 == 1234567);
+ expect(0_0_1_2_3_4_5_6_7 == 1234567);
+
+ expect(0b0_0_0_0 == 0);
+ expect(0b1010_1010 == 0b10101010);
+ expect(0b0000_1010_1010 == 0b10101010);
+ expect(0b1_0_1_0_1_0_1_0 == 0b10101010);
+
+ expect(0o0_0_0_0 == 0);
+ expect(0o1010_1010 == 0o10101010);
+ expect(0o0000_1010_1010 == 0o10101010);
+ expect(0o1_0_1_0_1_0_1_0 == 0o10101010);
+
+ expect(0x0_0_0_0 == 0);
+ expect(0x1010_1010 == 0x10101010);
+ expect(0x0000_1010_1010 == 0x10101010);
+ expect(0x1_0_1_0_1_0_1_0 == 0x10101010);
+
+ expect(123_456.789_000e1_0 == 123456.789000e10);
+ expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10);
+
+ expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10);
+ expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10);
+}
+
test "hex float literal within range" {
const a = 0x1.0p16383;
const b = 0x0.1p16387;
test/compile_errors.zig
@@ -389,6 +389,102 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
"tmp.zig:5:29: error: invalid token: '.'",
});
+ cases.add("invalid underscore placement in float literal - 1",
+ \\fn main() void {
+ \\ var bad: f128 = 0._0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 2",
+ \\fn main() void {
+ \\ var bad: f128 = 0_.0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: '.'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 3",
+ \\fn main() void {
+ \\ var bad: f128 = 0.0_;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:25: error: invalid character: ';'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 4",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e_1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:25: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 5",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e+_1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:26: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 6",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e-_1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:26: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 7",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e-1_;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:28: error: invalid character: ';'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 9",
+ \\fn main() void {
+ \\ var bad: f128 = 1__0.0e-1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 10",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0__0e-1;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:25: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 11",
+ \\fn main() void {
+ \\ var bad: f128 = 1.0e-1__0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:28: error: invalid character: '_'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 12",
+ \\fn main() void {
+ \\ var bad: f128 = 0_x0.0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: 'x'",
+ });
+
+ cases.add("invalid underscore placement in float literal - 13",
+ \\fn main() void {
+ \\ var bad: f128 = 0x_0.0;
+ \\})
+ , &[_][]const u8{
+ "tmp.zig:2:23: error: invalid character: '_'",
+ });
+
cases.add("var args without c calling conv",
\\fn foo(args: ...) void {}
\\comptime {