Commit 54e887ed9e

Andrew Kelley <superjoe30@gmail.com>
2018-05-24 07:15:50
std.zig.tokenizer: fix tokenization of hex floats
1 parent b132a17
Changed files (2)
std/zig/parser_test.zig
@@ -1,3 +1,11 @@
+test "zig fmt: float literal with exponent" {
+    try testCanonical(
+        \\pub const f64_true_min = 4.94065645841246544177e-324;
+        \\const threshold = 0x1.a827999fcef32p+1022;
+        \\
+    );
+}
+
 test "zig fmt: if-else end of comptime" {
     try testCanonical(
         \\comptime {
@@ -238,13 +246,6 @@ test "zig fmt: switch with empty body" {
     );
 }
 
-test "zig fmt: float literal with exponent" {
-    try testCanonical(
-        \\pub const f64_true_min = 4.94065645841246544177e-324;
-        \\
-    );
-}
-
 test "zig fmt: line comments in struct initializer" {
     try testCanonical(
         \\fn foo() void {
std/zig/tokenizer.zig
@@ -6,12 +6,12 @@ pub const Token = struct {
     start: usize,
     end: usize,
 
-    const Keyword = struct {
+    pub const Keyword = struct {
         bytes: []const u8,
         id: Id,
     };
 
-    const keywords = []Keyword {
+    pub const keywords = []Keyword {
         Keyword{.bytes="align", .id = Id.Keyword_align},
         Keyword{.bytes="and", .id = Id.Keyword_and},
         Keyword{.bytes="asm", .id = Id.Keyword_asm},
@@ -62,6 +62,7 @@ pub const Token = struct {
         Keyword{.bytes="while", .id = Id.Keyword_while},
     };
 
+    // TODO perfect hash at comptime
     fn getKeyword(bytes: []const u8) ?Id {
         for (keywords) |kw| {
             if (mem.eql(u8, kw.bytes, bytes)) {
@@ -236,10 +237,15 @@ pub const Tokenizer = struct {
         Zero,
         IntegerLiteral,
         IntegerLiteralWithRadix,
+        IntegerLiteralWithRadixHex,
         NumberDot,
+        NumberDotHex,
         FloatFraction,
+        FloatFractionHex,
         FloatExponentUnsigned,
+        FloatExponentUnsignedHex,
         FloatExponentNumber,
+        FloatExponentNumberHex,
         Ampersand,
         Caret,
         Percent,
@@ -839,9 +845,12 @@ pub const Tokenizer = struct {
                     else => self.checkLiteralCharacter(),
                 },
                 State.Zero => switch (c) {
-                    'b', 'o', 'x' => {
+                    'b', 'o' => {
                         state = State.IntegerLiteralWithRadix;
                     },
+                    'x' => {
+                        state = State.IntegerLiteralWithRadixHex;
+                    },
                     else => {
                         // reinterpret as a normal number
                         self.index -= 1;
@@ -862,8 +871,15 @@ pub const Tokenizer = struct {
                     '.' => {
                         state = State.NumberDot;
                     },
+                    '0'...'9' => {},
+                    else => break,
+                },
+                State.IntegerLiteralWithRadixHex => switch (c) {
+                    '.' => {
+                        state = State.NumberDotHex;
+                    },
                     'p', 'P' => {
-                        state = State.FloatExponentUnsigned;
+                        state = State.FloatExponentUnsignedHex;
                     },
                     '0'...'9', 'a'...'f', 'A'...'F' => {},
                     else => break,
@@ -880,13 +896,32 @@ pub const Tokenizer = struct {
                         state = State.FloatFraction;
                     },
                 },
+                State.NumberDotHex => switch (c) {
+                    '.' => {
+                        self.index -= 1;
+                        state = State.Start;
+                        break;
+                    },
+                    else => {
+                        self.index -= 1;
+                        result.id = Token.Id.FloatLiteral;
+                        state = State.FloatFractionHex;
+                    },
+                },
                 State.FloatFraction => switch (c) {
-                    'p', 'P', 'e', 'E' => {
+                    'e', 'E' => {
                         state = State.FloatExponentUnsigned;
                     },
                     '0'...'9' => {},
                     else => break,
                 },
+                State.FloatFractionHex => switch (c) {
+                    'p', 'P' => {
+                        state = State.FloatExponentUnsignedHex;
+                    },
+                    '0'...'9', 'a'...'f', 'A'...'F' => {},
+                    else => break,
+                },
                 State.FloatExponentUnsigned => switch (c) {
                     '+', '-' => {
                         state = State.FloatExponentNumber;
@@ -897,7 +932,21 @@ pub const Tokenizer = struct {
                         state = State.FloatExponentNumber;
                     }
                 },
+                State.FloatExponentUnsignedHex => switch (c) {
+                    '+', '-' => {
+                        state = State.FloatExponentNumberHex;
+                    },
+                    else => {
+                        // reinterpret as a normal exponent number
+                        self.index -= 1;
+                        state = State.FloatExponentNumberHex;
+                    }
+                },
                 State.FloatExponentNumber => switch (c) {
+                    '0'...'9' => {},
+                    else => break,
+                },
+                State.FloatExponentNumberHex => switch (c) {
                     '0'...'9', 'a'...'f', 'A'...'F' => {},
                     else => break,
                 },
@@ -908,8 +957,11 @@ pub const Tokenizer = struct {
                 State.C,
                 State.IntegerLiteral,
                 State.IntegerLiteralWithRadix,
+                State.IntegerLiteralWithRadixHex,
                 State.FloatFraction,
+                State.FloatFractionHex,
                 State.FloatExponentNumber,
+                State.FloatExponentNumberHex,
                 State.StringLiteral, // find this error later
                 State.MultilineStringLiteralLine,
                 State.Builtin => {},
@@ -928,7 +980,9 @@ pub const Tokenizer = struct {
                 },
 
                 State.NumberDot,
+                State.NumberDotHex,
                 State.FloatExponentUnsigned,
+                State.FloatExponentUnsignedHex,
                 State.SawAtSign,
                 State.Backslash,
                 State.MultilineStringLiteralLineBackslash,
@@ -1073,7 +1127,7 @@ test "tokenizer" {
     });
 }
 
-test "tokenizer - float literal" {
+test "tokenizer - float literal e exponent" {
     testTokenize("a = 4.94065645841246544177e-324;\n", []Token.Id {
         Token.Id.Identifier,
         Token.Id.Equal,
@@ -1082,6 +1136,15 @@ test "tokenizer - float literal" {
     });
 }
 
+test "tokenizer - float literal p exponent" {
+    testTokenize("a = 0x1.a827999fcef32p+1022;\n", []Token.Id {
+        Token.Id.Identifier,
+        Token.Id.Equal,
+        Token.Id.FloatLiteral,
+        Token.Id.Semicolon,
+    });
+}
+
 test "tokenizer - chars" {
     testTokenize("'c'", []Token.Id {Token.Id.CharLiteral});
 }