Commit c5c23db627

Andrew Kelley <andrew@ziglang.org>
2021-07-02 21:33:05
tokenizer: clean up invalid token error
It now displays the byte with proper printability handling. This makes the relevant compile error test case no longer a regression in quality from stage1 to stage2.
1 parent 7a2e0d9
Changed files (7)
lib/std/zig/tokenizer.zig
@@ -551,8 +551,9 @@ pub const Tokenizer = struct {
                     },
                     else => {
                         result.tag = .invalid;
+                        result.loc.end = self.index;
                         self.index += 1;
-                        break;
+                        return result;
                     },
                 },
 
src/stage1/analyze.cpp
@@ -3915,7 +3915,7 @@ static void add_top_level_decl(CodeGen *g, ScopeDecls *decls_scope, Tld *tld) {
                 }
             }
             ErrorMsg *msg = add_node_error(g, tld->source_node, buf_sprintf("redefinition of '%s'", buf_ptr(tld->name)));
-            add_error_note(g, msg, other_tld->source_node, buf_sprintf("previous definition is here"));
+            add_error_note(g, msg, other_tld->source_node, buf_sprintf("previous definition here"));
             return;
         }
 
@@ -4176,7 +4176,7 @@ ZigVar *add_variable(CodeGen *g, AstNode *source_node, Scope *parent_scope, Buf
             if (existing_var->var_type == nullptr || !type_is_invalid(existing_var->var_type)) {
                 ErrorMsg *msg = add_node_error(g, source_node,
                         buf_sprintf("redeclaration of variable '%s'", buf_ptr(name)));
-                add_error_note(g, msg, existing_var->decl_node, buf_sprintf("previous declaration is here"));
+                add_error_note(g, msg, existing_var->decl_node, buf_sprintf("previous declaration here"));
             }
             variable_entry->var_type = g->builtin_types.entry_invalid;
         } else {
@@ -4205,7 +4205,7 @@ ZigVar *add_variable(CodeGen *g, AstNode *source_node, Scope *parent_scope, Buf
                         if (want_err_msg) {
                             ErrorMsg *msg = add_node_error(g, source_node,
                                     buf_sprintf("redefinition of '%s'", buf_ptr(name)));
-                            add_error_note(g, msg, tld->source_node, buf_sprintf("previous definition is here"));
+                            add_error_note(g, msg, tld->source_node, buf_sprintf("previous definition here"));
                         }
                         variable_entry->var_type = g->builtin_types.entry_invalid;
                     }
src/main.zig
@@ -233,7 +233,7 @@ pub fn mainArgs(gpa: *Allocator, arena: *Allocator, args: []const []const u8) !v
     } else if (mem.eql(u8, cmd, "build")) {
         return cmdBuild(gpa, arena, cmd_args);
     } else if (mem.eql(u8, cmd, "fmt")) {
-        return cmdFmt(gpa, cmd_args);
+        return cmdFmt(gpa, arena, cmd_args);
     } else if (mem.eql(u8, cmd, "libc")) {
         return cmdLibC(gpa, cmd_args);
     } else if (mem.eql(u8, cmd, "init-exe")) {
@@ -3039,12 +3039,13 @@ const Fmt = struct {
     check_ast: bool,
     color: Color,
     gpa: *Allocator,
+    arena: *Allocator,
     out_buffer: std.ArrayList(u8),
 
     const SeenMap = std.AutoHashMap(fs.File.INode, void);
 };
 
-pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
+pub fn cmdFmt(gpa: *Allocator, arena: *Allocator, args: []const []const u8) !void {
     var color: Color = .auto;
     var stdin_flag: bool = false;
     var check_flag: bool = false;
@@ -3102,7 +3103,7 @@ pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
         defer tree.deinit(gpa);
 
         for (tree.errors) |parse_error| {
-            try printErrMsgToStdErr(gpa, parse_error, tree, "<stdin>", color);
+            try printErrMsgToStdErr(gpa, arena, parse_error, tree, "<stdin>", color);
         }
         var has_ast_error = false;
         if (check_ast_flag) {
@@ -3170,6 +3171,7 @@ pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
 
     var fmt = Fmt{
         .gpa = gpa,
+        .arena = arena,
         .seen = Fmt.SeenMap.init(gpa),
         .any_error = false,
         .check_ast = check_ast_flag,
@@ -3293,7 +3295,7 @@ fn fmtPathFile(
     defer tree.deinit(fmt.gpa);
 
     for (tree.errors) |parse_error| {
-        try printErrMsgToStdErr(fmt.gpa, parse_error, tree, file_path, fmt.color);
+        try printErrMsgToStdErr(fmt.gpa, fmt.arena, parse_error, tree, file_path, fmt.color);
     }
     if (tree.errors.len != 0) {
         fmt.any_error = true;
@@ -3374,6 +3376,7 @@ fn fmtPathFile(
 
 fn printErrMsgToStdErr(
     gpa: *mem.Allocator,
+    arena: *mem.Allocator,
     parse_error: ast.Error,
     tree: ast.Tree,
     path: []const u8,
@@ -3395,11 +3398,14 @@ fn printErrMsgToStdErr(
 
     if (token_tags[parse_error.token] == .invalid) {
         const bad_off = @intCast(u32, tree.tokenSlice(parse_error.token).len);
+        const byte_offset = @intCast(u32, start_loc.line_start) + bad_off;
         notes_buffer[notes_len] = .{
             .src = .{
                 .src_path = path,
-                .msg = "invalid byte here",
-                .byte_offset = @intCast(u32, start_loc.line_start) + bad_off,
+                .msg = try std.fmt.allocPrint(arena, "invalid byte: '{'}'", .{
+                    std.zig.fmtEscapes(tree.source[byte_offset..][0..1]),
+                }),
+                .byte_offset = byte_offset,
                 .line = @intCast(u32, start_loc.line),
                 .column = @intCast(u32, start_loc.column) + bad_off,
                 .source_line = source_line,
@@ -3943,7 +3949,7 @@ pub fn cmdAstCheck(
     defer file.tree.deinit(gpa);
 
     for (file.tree.errors) |parse_error| {
-        try printErrMsgToStdErr(gpa, parse_error, file.tree, file.sub_file_path, color);
+        try printErrMsgToStdErr(gpa, arena, parse_error, file.tree, file.sub_file_path, color);
     }
     if (file.tree.errors.len != 0) {
         process.exit(1);
@@ -4069,7 +4075,7 @@ pub fn cmdChangelist(
     defer file.tree.deinit(gpa);
 
     for (file.tree.errors) |parse_error| {
-        try printErrMsgToStdErr(gpa, parse_error, file.tree, old_source_file, .auto);
+        try printErrMsgToStdErr(gpa, arena, parse_error, file.tree, old_source_file, .auto);
     }
     if (file.tree.errors.len != 0) {
         process.exit(1);
@@ -4108,7 +4114,7 @@ pub fn cmdChangelist(
     defer new_tree.deinit(gpa);
 
     for (new_tree.errors) |parse_error| {
-        try printErrMsgToStdErr(gpa, parse_error, new_tree, new_source_file, .auto);
+        try printErrMsgToStdErr(gpa, arena, parse_error, new_tree, new_source_file, .auto);
     }
     if (new_tree.errors.len != 0) {
         process.exit(1);
src/Module.zig
@@ -2480,11 +2480,12 @@ pub fn astGenFile(mod: *Module, file: *Scope.File) !void {
         };
         if (token_tags[parse_err.token] == .invalid) {
             const bad_off = @intCast(u32, file.tree.tokenSlice(parse_err.token).len);
+            const byte_abs = token_starts[parse_err.token] + bad_off;
             try mod.errNoteNonLazy(.{
                 .file_scope = file,
                 .parent_decl_node = 0,
-                .lazy = .{ .byte_abs = token_starts[parse_err.token] + bad_off },
-            }, err_msg, "invalid byte here", .{});
+                .lazy = .{ .byte_abs = byte_abs },
+            }, err_msg, "invalid byte: '{'}'", .{ std.zig.fmtEscapes(source[byte_abs..][0..1]) });
         }
 
         {
test/stage2/cbe.zig
@@ -591,6 +591,7 @@ pub fn addCases(ctx: *TestContext) !void {
         , &.{
             ":3:5: error: enum fields cannot be marked comptime",
             ":8:8: error: enum fields do not have types",
+            ":6:12: note: consider 'union(enum)' here to make it a tagged union",
         });
 
         // @enumToInt, @intToEnum, enum literal coercion, field access syntax, comparison, switch
test/cases.zig
@@ -1039,8 +1039,8 @@ pub fn addCases(ctx: *TestContext) !void {
             \\    var i: u32 = 10;
             \\}
         , &[_][]const u8{
-            ":3:9: error: redeclaration of 'i'",
-            ":2:9: note: previously declared here",
+            ":3:9: error: redeclaration of local variable 'i'",
+            ":2:9: note: previous declaration here",
         });
         case.addError(
             \\var testing: i64 = 10;
@@ -1061,8 +1061,8 @@ pub fn addCases(ctx: *TestContext) !void {
             \\    };
             \\}
         , &[_][]const u8{
-            ":5:19: error: redeclaration of 'c'",
-            ":4:19: note: previously declared here",
+            ":5:19: error: redeclaration of local constant 'c'",
+            ":4:19: note: previous declaration here",
         });
     }
 
@@ -1214,7 +1214,7 @@ pub fn addCases(ctx: *TestContext) !void {
             \\}
         , &[_][]const u8{
             ":2:11: error: redefinition of label 'blk'",
-            ":2:5: note: previous definition is here",
+            ":2:5: note: previous definition here",
         });
     }
 
test/compile_errors.zig
@@ -1507,7 +1507,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:28: note: invalid byte here",
+        "tmp.zig:2:28: note: invalid byte: 'a'",
     });
 
     ctx.objErrStage1("invalid exponent in float literal - 2",
@@ -1517,7 +1517,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:29: note: invalid byte here",
+        "tmp.zig:2:29: note: invalid byte: 'F'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 1",
@@ -1527,7 +1527,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:23: note: invalid byte here",
+        "tmp.zig:2:23: note: invalid byte: '_'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 2",
@@ -1537,7 +1537,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:23: note: invalid byte here",
+        "tmp.zig:2:23: note: invalid byte: '.'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 3",
@@ -1547,7 +1547,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:25: note: invalid byte here",
+        "tmp.zig:2:25: note: invalid byte: ';'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 4",
@@ -1557,7 +1557,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:25: note: invalid byte here",
+        "tmp.zig:2:25: note: invalid byte: '_'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 5",
@@ -1567,7 +1567,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:26: note: invalid byte here",
+        "tmp.zig:2:26: note: invalid byte: '_'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 6",
@@ -1577,7 +1577,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:26: note: invalid byte here",
+        "tmp.zig:2:26: note: invalid byte: '_'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 7",
@@ -1587,7 +1587,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:28: note: invalid byte here",
+        "tmp.zig:2:28: note: invalid byte: ';'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 9",
@@ -1597,7 +1597,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:23: note: invalid byte here",
+        "tmp.zig:2:23: note: invalid byte: '_'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 10",
@@ -1607,7 +1607,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:25: note: invalid byte here",
+        "tmp.zig:2:25: note: invalid byte: '_'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 11",
@@ -1617,7 +1617,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:28: note: invalid byte here",
+        "tmp.zig:2:28: note: invalid byte: '_'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 12",
@@ -1627,7 +1627,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:23: note: invalid byte here",
+        "tmp.zig:2:23: note: invalid byte: 'x'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 13",
@@ -1637,7 +1637,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:23: note: invalid byte here",
+        "tmp.zig:2:23: note: invalid byte: '_'",
     });
 
     ctx.objErrStage1("invalid underscore placement in float literal - 14",
@@ -1647,7 +1647,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:27: note: invalid byte here",
+        "tmp.zig:2:27: note: invalid byte: 'p'",
     });
 
     ctx.objErrStage1("invalid underscore placement in int literal - 1",
@@ -1657,7 +1657,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:26: note: invalid byte here",
+        "tmp.zig:2:26: note: invalid byte: ';'",
     });
 
     ctx.objErrStage1("invalid underscore placement in int literal - 2",
@@ -1667,7 +1667,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:28: note: invalid byte here",
+        "tmp.zig:2:28: note: invalid byte: ';'",
     });
 
     ctx.objErrStage1("invalid underscore placement in int literal - 3",
@@ -1677,7 +1677,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:28: note: invalid byte here",
+        "tmp.zig:2:28: note: invalid byte: ';'",
     });
 
     ctx.objErrStage1("invalid underscore placement in int literal - 4",
@@ -1687,7 +1687,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:21: error: expected expression, found 'invalid'",
-        "tmp.zig:2:28: note: invalid byte here",
+        "tmp.zig:2:28: note: invalid byte: ';'",
     });
 
     ctx.objErrStage1("comptime struct field, no init value",
@@ -4932,10 +4932,10 @@ pub fn addCases(ctx: *TestContext) !void {
     });
 
     ctx.objErrStage1("wrong number of arguments",
-        \\export fn d() void {
-        \\    e(1);
+        \\export fn a() void {
+        \\    c(1);
         \\}
-        \\fn b(a: i32, b: i32, c: i32) void { _ = a; _ = b; _ = c; }
+        \\fn c(d: i32, e: i32, f: i32) void { _ = d; _ = e; _ = f; }
     , &[_][]const u8{
         "tmp.zig:2:6: error: expected 3 argument(s), found 1",
     });
@@ -5669,7 +5669,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\b";
     , &[_][]const u8{
         "tmp.zig:1:13: error: expected expression, found 'invalid'",
-        "tmp.zig:1:15: note: invalid byte here",
+        "tmp.zig:1:15: note: invalid byte: '\\n'",
     });
 
     ctx.objErrStage1("invalid comparison for function pointers",
@@ -7569,7 +7569,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         "tmp.zig:2:15: error: expected expression, found 'invalid'",
-        "tmp.zig:2:18: note: invalid byte here",
+        "tmp.zig:2:18: note: invalid byte: '1'",
     });
 
     ctx.objErrStage1("invalid empty unicode escape",
@@ -7584,7 +7584,8 @@ pub fn addCases(ctx: *TestContext) !void {
         "fn foo() bool {\r\n" ++
         "    return true;\r\n" ++
         "}\r\n", &[_][]const u8{
-        "tmp.zig:1:1: error: invalid character: '\\xff'",
+        "tmp.zig:1:1: error: expected test, comptime, var decl, or container field, found 'invalid'",
+        "tmp.zig:1:1: note: invalid byte: '\\xff'",
     });
 
     ctx.objErrStage1("non-printable invalid character with escape alternative", "fn foo() bool {\n" ++
@@ -8769,7 +8770,7 @@ pub fn addCases(ctx: *TestContext) !void {
         \\}
     , &[_][]const u8{
         // Ideally this would be column 30 but it's not very important.
-        "tmp.zig:2:28: error: `.*` cannot be followed by `*`. Are you missing a space?",
+        "tmp.zig:2:28: error: '.*' cannot be followed by '*'. Are you missing a space?",
     });
 
     ctx.objErrStage1("Issue #9165: windows tcp server compilation error",