Commit 24c432608f

Andrew Kelley <andrew@ziglang.org>
2021-07-01 09:14:58
stage2: improve compile errors from tokenizer
In order to not regress the quality of compile errors, some improvements had to be made. * std.zig.parseCharLiteral is improved to return more detailed parse failure information. * tokenizer is improved to handle null bytes in the middle of strings, character literals, and line comments. * validating how many unicode escape digits in string literals is moved to std.zig.parseStringLiteral rather than handled in the tokenizer. * when a tokenizer error occurs, if the reported token is the 'invalid' tag, an error note is added to point to the invalid byte location. Further improvements would be: - Mention the expected set of allowed bytes at this location. - Display the invalid byte (if printable, print it, otherwise escape-print it).
1 parent 3f680ab
lib/std/zig/tokenizer.zig
@@ -701,12 +701,19 @@ pub const Tokenizer = struct {
                         self.index += 1;
                         break;
                     },
-                    0, '\n', '\r' => break, // Look for this error later.
+                    0 => {
+                        if (self.index == self.buffer.len) {
+                            break;
+                        } else {
+                            self.checkLiteralCharacter();
+                        }
+                    },
+                    '\n', '\r' => break, // Look for this error later.
                     else => self.checkLiteralCharacter(),
                 },
 
                 .string_literal_backslash => switch (c) {
-                    0, '\n', '\r' => break, // Look for this error later.
+                    '\n', '\r' => break, // Look for this error later.
                     else => {
                         state = .string_literal;
                     },
@@ -774,7 +781,6 @@ pub const Tokenizer = struct {
                 .char_literal_unicode_escape_saw_u => switch (c) {
                     '{' => {
                         state = .char_literal_unicode_escape;
-                        seen_escape_digits = 0;
                     },
                     else => {
                         result.tag = .invalid;
@@ -783,16 +789,9 @@ pub const Tokenizer = struct {
                 },
 
                 .char_literal_unicode_escape => switch (c) {
-                    '0'...'9', 'a'...'f', 'A'...'F' => {
-                        seen_escape_digits += 1;
-                    },
+                    '0'...'9', 'a'...'f', 'A'...'F' => {},
                     '}' => {
-                        if (seen_escape_digits == 0) {
-                            result.tag = .invalid;
-                            state = .char_literal_unicode_invalid;
-                        } else {
-                            state = .char_literal_end;
-                        }
+                        state = .char_literal_end; // too many/few digits handled later
                     },
                     else => {
                         result.tag = .invalid;
@@ -1026,7 +1025,13 @@ pub const Tokenizer = struct {
                     },
                 },
                 .line_comment_start => switch (c) {
-                    0 => break,
+                    0 => {
+                        if (self.index != self.buffer.len) {
+                            result.tag = .invalid;
+                            self.index += 1;
+                        }
+                        break;
+                    },
                     '/' => {
                         state = .doc_comment_start;
                     },
@@ -1441,7 +1446,7 @@ test "tokenizer - code point literal with unicode escapes" {
     , &.{ .invalid, .invalid });
     try testTokenize(
         \\'\u{}'
-    , &.{ .invalid, .invalid });
+    , &.{.char_literal});
     try testTokenize(
         \\'\u{s}'
     , &.{ .invalid, .invalid });
@@ -1924,15 +1929,17 @@ test "tokenizer - invalid builtin identifiers" {
     try testTokenize("@0()", &.{ .invalid, .integer_literal, .l_paren, .r_paren });
 }
 
-fn testTokenize(source: []const u8, expected_tokens: []const Token.Tag) !void {
+fn testTokenize(source: [:0]const u8, expected_tokens: []const Token.Tag) !void {
     var tokenizer = Tokenizer.init(source);
     for (expected_tokens) |expected_token_id| {
         const token = tokenizer.next();
         if (token.tag != expected_token_id) {
-            std.debug.panic("expected {s}, found {s}\n", .{ @tagName(expected_token_id), @tagName(token.tag) });
+            std.debug.panic("expected {s}, found {s}\n", .{
+                @tagName(expected_token_id), @tagName(token.tag),
+            });
         }
     }
     const last_token = tokenizer.next();
-    try std.testing.expect(last_token.tag == .eof);
-    try std.testing.expect(last_token.loc.start == source.len);
+    try std.testing.expectEqual(Token.Tag.eof, last_token.tag);
+    try std.testing.expectEqual(source.len, last_token.loc.start);
 }
lib/std/zig.zig
@@ -6,6 +6,7 @@
 const std = @import("std.zig");
 const tokenizer = @import("zig/tokenizer.zig");
 const fmt = @import("zig/fmt.zig");
+const assert = std.debug.assert;
 
 pub const Token = tokenizer.Token;
 pub const Tokenizer = tokenizer.Tokenizer;
@@ -183,29 +184,48 @@ pub fn binNameAlloc(allocator: *std.mem.Allocator, options: BinNameOptions) erro
     }
 }
 
+pub const ParsedCharLiteral = union(enum) {
+    success: u32,
+    /// The character after backslash is not recognized.
+    invalid_escape_character: usize,
+    /// Expected hex digit at this index.
+    expected_hex_digit: usize,
+    /// Unicode escape sequence had no digits with rbrace at this index.
+    empty_unicode_escape_sequence: usize,
+    /// Expected hex digit or '}' at this index.
+    expected_hex_digit_or_rbrace: usize,
+    /// The unicode point is outside the range of Unicode codepoints.
+    unicode_escape_overflow: usize,
+    /// Expected '{' at this index.
+    expected_lbrace: usize,
+    /// Expected the terminating single quote at this index.
+    expected_end: usize,
+    /// The character at this index cannot be represented without an escape sequence.
+    invalid_character: usize,
+};
+
 /// Only validates escape sequence characters.
 /// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between.
-pub fn parseCharLiteral(
-    slice: []const u8,
-    bad_index: *usize, // populated if error.InvalidCharacter is returned
-) error{InvalidCharacter}!u32 {
-    std.debug.assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\'');
+pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral {
+    assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\'');
 
-    if (slice[1] == '\\') {
-        switch (slice[2]) {
-            'n' => return '\n',
-            'r' => return '\r',
-            '\\' => return '\\',
-            't' => return '\t',
-            '\'' => return '\'',
-            '"' => return '"',
+    switch (slice[1]) {
+        0 => return .{ .invalid_character = 1 },
+        '\\' => switch (slice[2]) {
+            'n' => return .{ .success = '\n' },
+            'r' => return .{ .success = '\r' },
+            '\\' => return .{ .success = '\\' },
+            't' => return .{ .success = '\t' },
+            '\'' => return .{ .success = '\'' },
+            '"' => return .{ .success = '"' },
             'x' => {
-                if (slice.len != 6) {
-                    bad_index.* = slice.len - 2;
-                    return error.InvalidCharacter;
+                if (slice.len < 4) {
+                    return .{ .expected_hex_digit = 3 };
                 }
                 var value: u32 = 0;
-                for (slice[3..5]) |c, i| {
+                var i: usize = 3;
+                while (i < 5) : (i += 1) {
+                    const c = slice[i];
                     switch (c) {
                         '0'...'9' => {
                             value *= 16;
@@ -220,20 +240,28 @@ pub fn parseCharLiteral(
                             value += c - 'A' + 10;
                         },
                         else => {
-                            bad_index.* = 3 + i;
-                            return error.InvalidCharacter;
+                            return .{ .expected_hex_digit = i };
                         },
                     }
                 }
-                return value;
+                if (slice[i] != '\'') {
+                    return .{ .expected_end = i };
+                }
+                return .{ .success = value };
             },
             'u' => {
-                if (slice.len < "'\\u{0}'".len or slice[3] != '{' or slice[slice.len - 2] != '}') {
-                    bad_index.* = 2;
-                    return error.InvalidCharacter;
+                var i: usize = 3;
+                if (slice[i] != '{') {
+                    return .{ .expected_lbrace = i };
                 }
+                i += 1;
+                if (slice[i] == '}') {
+                    return .{ .empty_unicode_escape_sequence = i };
+                }
+
                 var value: u32 = 0;
-                for (slice[4 .. slice.len - 2]) |c, i| {
+                while (i < slice.len) : (i += 1) {
+                    const c = slice[i];
                     switch (c) {
                         '0'...'9' => {
                             value *= 16;
@@ -247,49 +275,112 @@ pub fn parseCharLiteral(
                             value *= 16;
                             value += c - 'A' + 10;
                         },
-                        else => {
-                            bad_index.* = 4 + i;
-                            return error.InvalidCharacter;
+                        '}' => {
+                            i += 1;
+                            break;
                         },
+                        else => return .{ .expected_hex_digit_or_rbrace = i },
                     }
                     if (value > 0x10ffff) {
-                        bad_index.* = 4 + i;
-                        return error.InvalidCharacter;
+                        return .{ .unicode_escape_overflow = i };
                     }
                 }
-                return value;
-            },
-            else => {
-                bad_index.* = 2;
-                return error.InvalidCharacter;
+                if (slice[i] != '\'') {
+                    return .{ .expected_end = i };
+                }
+                return .{ .success = value };
             },
-        }
+            else => return .{ .invalid_escape_character = 2 },
+        },
+        else => {
+            const codepoint = std.unicode.utf8Decode(slice[1 .. slice.len - 1]) catch unreachable;
+            return .{ .success = codepoint };
+        },
     }
-    return std.unicode.utf8Decode(slice[1 .. slice.len - 1]) catch unreachable;
 }
 
 test "parseCharLiteral" {
-    var bad_index: usize = undefined;
-    try std.testing.expectEqual(try parseCharLiteral("'a'", &bad_index), 'a');
-    try std.testing.expectEqual(try parseCharLiteral("'ä'", &bad_index), 'ä');
-    try std.testing.expectEqual(try parseCharLiteral("'\\x00'", &bad_index), 0);
-    try std.testing.expectEqual(try parseCharLiteral("'\\x4f'", &bad_index), 0x4f);
-    try std.testing.expectEqual(try parseCharLiteral("'\\x4F'", &bad_index), 0x4f);
-    try std.testing.expectEqual(try parseCharLiteral("'ぁ'", &bad_index), 0x3041);
-    try std.testing.expectEqual(try parseCharLiteral("'\\u{0}'", &bad_index), 0);
-    try std.testing.expectEqual(try parseCharLiteral("'\\u{3041}'", &bad_index), 0x3041);
-    try std.testing.expectEqual(try parseCharLiteral("'\\u{7f}'", &bad_index), 0x7f);
-    try std.testing.expectEqual(try parseCharLiteral("'\\u{7FFF}'", &bad_index), 0x7FFF);
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 'a' },
+        parseCharLiteral("'a'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 'ä' },
+        parseCharLiteral("'ä'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 0 },
+        parseCharLiteral("'\\x00'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 0x4f },
+        parseCharLiteral("'\\x4f'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 0x4f },
+        parseCharLiteral("'\\x4F'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 0x3041 },
+        parseCharLiteral("'ぁ'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 0 },
+        parseCharLiteral("'\\u{0}'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 0x3041 },
+        parseCharLiteral("'\\u{3041}'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 0x7f },
+        parseCharLiteral("'\\u{7f}'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .success = 0x7fff },
+        parseCharLiteral("'\\u{7FFF}'"),
+    );
 
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\x0'", &bad_index));
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\x000'", &bad_index));
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\y'", &bad_index));
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u'", &bad_index));
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\uFFFF'", &bad_index));
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u{}'", &bad_index));
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u{FFFFFF}'", &bad_index));
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u{FFFF'", &bad_index));
-    try std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u{FFFF}x'", &bad_index));
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .expected_hex_digit = 4 },
+        parseCharLiteral("'\\x0'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .expected_end = 5 },
+        parseCharLiteral("'\\x000'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .invalid_escape_character = 2 },
+        parseCharLiteral("'\\y'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .expected_lbrace = 3 },
+        parseCharLiteral("'\\u'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .expected_lbrace = 3 },
+        parseCharLiteral("'\\uFFFF'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .empty_unicode_escape_sequence = 4 },
+        parseCharLiteral("'\\u{}'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .unicode_escape_overflow = 9 },
+        parseCharLiteral("'\\u{FFFFFF}'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .expected_hex_digit_or_rbrace = 8 },
+        parseCharLiteral("'\\u{FFFF'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .expected_end = 9 },
+        parseCharLiteral("'\\u{FFFF}x'"),
+    );
+    try std.testing.expectEqual(
+        ParsedCharLiteral{ .invalid_character = 1 },
+        parseCharLiteral("'\x00'"),
+    );
 }
 
 test {
src/AstGen.zig
@@ -6380,20 +6380,76 @@ fn charLiteral(gz: *GenZir, rl: ResultLoc, node: ast.Node.Index) !Zir.Inst.Ref {
     const main_token = main_tokens[node];
     const slice = tree.tokenSlice(main_token);
 
-    var bad_index: usize = undefined;
-    const value = std.zig.parseCharLiteral(slice, &bad_index) catch |err| switch (err) {
-        error.InvalidCharacter => {
-            const bad_byte = slice[bad_index];
+    switch (std.zig.parseCharLiteral(slice)) {
+        .success => |codepoint| {
+            const result = try gz.addInt(codepoint);
+            return rvalue(gz, rl, result, node);
+        },
+        .invalid_escape_character => |bad_index| {
             return astgen.failOff(
                 main_token,
                 @intCast(u32, bad_index),
-                "invalid character: '{c}'\n",
-                .{bad_byte},
+                "invalid escape character: '{c}'",
+                .{slice[bad_index]},
             );
         },
-    };
-    const result = try gz.addInt(value);
-    return rvalue(gz, rl, result, node);
+        .expected_hex_digit => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "expected hex digit, found '{c}'",
+                .{slice[bad_index]},
+            );
+        },
+        .empty_unicode_escape_sequence => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "empty unicode escape sequence",
+                .{},
+            );
+        },
+        .expected_hex_digit_or_rbrace => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "expected hex digit or '}}', found '{c}'",
+                .{slice[bad_index]},
+            );
+        },
+        .unicode_escape_overflow => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "unicode escape too large to be a valid codepoint",
+                .{},
+            );
+        },
+        .expected_lbrace => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "expected '{{', found '{c}",
+                .{slice[bad_index]},
+            );
+        },
+        .expected_end => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "expected ending single quote ('), found '{c}",
+                .{slice[bad_index]},
+            );
+        },
+        .invalid_character => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "invalid byte in character literal: '{c}'",
+                .{slice[bad_index]},
+            );
+        },
+    }
 }
 
 fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: ast.Node.Index) InnerError!Zir.Inst.Ref {
src/main.zig
@@ -3380,6 +3380,7 @@ fn printErrMsgToStdErr(
     color: Color,
 ) !void {
     const lok_token = parse_error.token;
+    const token_tags = tree.tokens.items(.tag);
     const start_loc = tree.tokenLocation(0, lok_token);
     const source_line = tree.source[start_loc.line_start..start_loc.line_end];
 
@@ -3389,6 +3390,24 @@ fn printErrMsgToStdErr(
     try tree.renderError(parse_error, writer);
     const text = text_buf.items;
 
+    var notes_buffer: [1]Compilation.AllErrors.Message = undefined;
+    var notes_len: usize = 0;
+
+    if (token_tags[parse_error.token] == .invalid) {
+        const bad_off = @intCast(u32, tree.tokenSlice(parse_error.token).len);
+        notes_buffer[notes_len] = .{
+            .src = .{
+                .src_path = path,
+                .msg = "invalid byte here",
+                .byte_offset = @intCast(u32, start_loc.line_start) + bad_off,
+                .line = @intCast(u32, start_loc.line),
+                .column = @intCast(u32, start_loc.column) + bad_off,
+                .source_line = source_line,
+            },
+        };
+        notes_len += 1;
+    }
+
     const message: Compilation.AllErrors.Message = .{
         .src = .{
             .src_path = path,
@@ -3397,6 +3416,7 @@ fn printErrMsgToStdErr(
             .line = @intCast(u32, start_loc.line),
             .column = @intCast(u32, start_loc.column),
             .source_line = source_line,
+            .notes = notes_buffer[0..notes_len],
         },
     };
 
src/Module.zig
@@ -2466,6 +2466,7 @@ pub fn astGenFile(mod: *Module, file: *Scope.File) !void {
         defer msg.deinit();
 
         const token_starts = file.tree.tokens.items(.start);
+        const token_tags = file.tree.tokens.items(.tag);
 
         try file.tree.renderError(parse_err, msg.writer());
         const err_msg = try gpa.create(ErrorMsg);
@@ -2477,6 +2478,14 @@ pub fn astGenFile(mod: *Module, file: *Scope.File) !void {
             },
             .msg = msg.toOwnedSlice(),
         };
+        if (token_tags[parse_err.token] == .invalid) {
+            const bad_off = @intCast(u32, file.tree.tokenSlice(parse_err.token).len);
+            try mod.errNoteNonLazy(.{
+                .file_scope = file,
+                .parent_decl_node = 0,
+                .lazy = .{ .byte_abs = token_starts[parse_err.token] + bad_off },
+            }, err_msg, "invalid byte here", .{});
+        }
 
         {
             const lock = comp.mutex.acquire();
test/compile_errors.zig
@@ -1506,7 +1506,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:28: error: invalid character: 'a'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:28: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid exponent in float literal - 2",
@@ -1515,7 +1516,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:29: error: invalid character: 'F'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:29: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 1",
@@ -1524,7 +1526,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:23: error: invalid character: '_'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:23: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 2",
@@ -1533,7 +1536,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:23: error: invalid character: '.'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:23: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 3",
@@ -1542,7 +1546,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:25: error: invalid character: ';'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:25: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 4",
@@ -1551,7 +1556,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:25: error: invalid character: '_'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:25: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 5",
@@ -1560,7 +1566,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:26: error: invalid character: '_'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:26: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 6",
@@ -1569,7 +1576,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:26: error: invalid character: '_'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:26: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 7",
@@ -1578,7 +1586,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:28: error: invalid character: ';'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:28: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 9",
@@ -1587,7 +1596,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:23: error: invalid character: '_'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:23: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 10",
@@ -1596,7 +1606,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:25: error: invalid character: '_'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:25: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 11",
@@ -1605,7 +1616,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:28: error: invalid character: '_'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:28: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 12",
@@ -1614,7 +1626,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:23: error: invalid character: 'x'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:23: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 13",
@@ -1623,7 +1636,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:23: error: invalid character: '_'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:23: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 14",
@@ -1632,7 +1646,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:27: error: invalid character: 'p'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:27: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in int literal - 1",
@@ -1641,7 +1656,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:26: error: invalid character: ';'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:26: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in int literal - 2",
@@ -1650,7 +1666,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:28: error: invalid character: ';'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:28: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in int literal - 3",
@@ -1659,7 +1676,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:28: error: invalid character: ';'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:28: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid underscore placement in int literal - 4",
@@ -1668,7 +1686,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    _ = bad;
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:28: error: invalid character: ';'",
+        "tmp.zig:2:21: error: expected expression, found 'invalid'",
+        "tmp.zig:2:28: note: invalid byte here",
     });
 
     ctx.objErrStage1("comptime struct field, no init value",
@@ -7544,7 +7563,8 @@ pub fn addCases(ctx: *TestContext) !void {
         \\    const a = '\U1234';
         \\}
     , &[_][]const u8{
-        "tmp.zig:2:17: error: invalid character: 'U'",
+        "tmp.zig:2:15: error: expected expression, found 'invalid'",
+        "tmp.zig:2:18: note: invalid byte here",
     });
 
     ctx.objErrStage1("invalid empty unicode escape",