Commit 5c6cd5e2c9

LemonBoy <thatlemon@gmail.com>
2020-09-27 17:17:27
stage{1,2}: Fix parsing of range literals
stage1 was unable to parse ranges whose starting point was written in binary/octal as the first dot in '...' was incorrectly interpreted as decimal point. stage2 forgot to reset the literal type to IntegerLiteral when it discovered the dot was not a decimal point. I've only stumbled across this bug because zig fmt keeps formatting the ranges without any space around the ...
1 parent e60939b
Changed files (2)
lib/std/zig/tokenizer.zig
@@ -1195,6 +1195,7 @@ pub const Tokenizer = struct {
                 },
                 .num_dot_hex => switch (c) {
                     '.' => {
+                        result.id = .IntegerLiteral;
                         self.index -= 1;
                         state = .start;
                         break;
@@ -1758,6 +1759,14 @@ test "correctly parse pointer assignment" {
     });
 }
 
+test "tokenizer - range literals" {
+    testTokenize("0...9", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral });
+    testTokenize("'0'...'9'", &[_]Token.Id{ .CharLiteral, .Ellipsis3, .CharLiteral });
+    testTokenize("0x00...0x09", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral });
+    testTokenize("0b00...0b11", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral });
+    testTokenize("0o00...0o11", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral });
+}
+
 test "tokenizer - number literals decimal" {
     testTokenize("0", &[_]Token.Id{.IntegerLiteral});
     testTokenize("1", &[_]Token.Id{.IntegerLiteral});
src/tokenizer.cpp
@@ -1225,9 +1225,6 @@ void tokenize(Buf *buf, Tokenization *out) {
                             invalid_char_error(&t, c);
                             break;
                         }
-                        if (t.radix != 16 && t.radix != 10) {
-                            invalid_char_error(&t, c);
-                        }
                         t.state = TokenizeStateNumberDot;
                         break;
                     }
@@ -1281,6 +1278,9 @@ void tokenize(Buf *buf, Tokenization *out) {
                         t.state = TokenizeStateStart;
                         continue;
                     }
+                    if (t.radix != 16 && t.radix != 10) {
+                        invalid_char_error(&t, c);
+                    }
                     t.pos -= 1;
                     t.state = TokenizeStateFloatFractionNoUnderscore;
                     assert(t.cur_tok->id == TokenIdIntLiteral);