Commit 1f84586836

Vexu <git@vexu.eu>
2019-12-14 16:00:10
translate-c-2 avoid collisions with zig keywords
1 parent 4dae70e
Changed files (3)
lib
src-self-hosted
test
lib/std/zig/tokenizer.zig
@@ -9,66 +9,77 @@ pub const Token = struct {
     pub const Keyword = struct {
         bytes: []const u8,
         id: Id,
+        hash: u32,
+
+        fn init(bytes: []const u8, id: Id) Keyword {
+            @setEvalBranchQuota(2000);
+            return .{
+                .bytes = bytes,
+                .id = id,
+                .hash = std.hash_map.hashString(bytes),
+            };
+        }
     };
 
     pub const keywords = [_]Keyword{
-        Keyword{ .bytes = "align", .id = Id.Keyword_align },
-        Keyword{ .bytes = "allowzero", .id = Id.Keyword_allowzero },
-        Keyword{ .bytes = "and", .id = Id.Keyword_and },
-        Keyword{ .bytes = "anyframe", .id = Id.Keyword_anyframe },
-        Keyword{ .bytes = "asm", .id = Id.Keyword_asm },
-        Keyword{ .bytes = "async", .id = Id.Keyword_async },
-        Keyword{ .bytes = "await", .id = Id.Keyword_await },
-        Keyword{ .bytes = "break", .id = Id.Keyword_break },
-        Keyword{ .bytes = "catch", .id = Id.Keyword_catch },
-        Keyword{ .bytes = "comptime", .id = Id.Keyword_comptime },
-        Keyword{ .bytes = "const", .id = Id.Keyword_const },
-        Keyword{ .bytes = "continue", .id = Id.Keyword_continue },
-        Keyword{ .bytes = "defer", .id = Id.Keyword_defer },
-        Keyword{ .bytes = "else", .id = Id.Keyword_else },
-        Keyword{ .bytes = "enum", .id = Id.Keyword_enum },
-        Keyword{ .bytes = "errdefer", .id = Id.Keyword_errdefer },
-        Keyword{ .bytes = "error", .id = Id.Keyword_error },
-        Keyword{ .bytes = "export", .id = Id.Keyword_export },
-        Keyword{ .bytes = "extern", .id = Id.Keyword_extern },
-        Keyword{ .bytes = "false", .id = Id.Keyword_false },
-        Keyword{ .bytes = "fn", .id = Id.Keyword_fn },
-        Keyword{ .bytes = "for", .id = Id.Keyword_for },
-        Keyword{ .bytes = "if", .id = Id.Keyword_if },
-        Keyword{ .bytes = "inline", .id = Id.Keyword_inline },
-        Keyword{ .bytes = "nakedcc", .id = Id.Keyword_nakedcc },
-        Keyword{ .bytes = "noalias", .id = Id.Keyword_noalias },
-        Keyword{ .bytes = "noasync", .id = Id.Keyword_noasync },
-        Keyword{ .bytes = "noinline", .id = Id.Keyword_noinline },
-        Keyword{ .bytes = "null", .id = Id.Keyword_null },
-        Keyword{ .bytes = "or", .id = Id.Keyword_or },
-        Keyword{ .bytes = "orelse", .id = Id.Keyword_orelse },
-        Keyword{ .bytes = "packed", .id = Id.Keyword_packed },
-        Keyword{ .bytes = "pub", .id = Id.Keyword_pub },
-        Keyword{ .bytes = "resume", .id = Id.Keyword_resume },
-        Keyword{ .bytes = "return", .id = Id.Keyword_return },
-        Keyword{ .bytes = "linksection", .id = Id.Keyword_linksection },
-        Keyword{ .bytes = "stdcallcc", .id = Id.Keyword_stdcallcc },
-        Keyword{ .bytes = "struct", .id = Id.Keyword_struct },
-        Keyword{ .bytes = "suspend", .id = Id.Keyword_suspend },
-        Keyword{ .bytes = "switch", .id = Id.Keyword_switch },
-        Keyword{ .bytes = "test", .id = Id.Keyword_test },
-        Keyword{ .bytes = "threadlocal", .id = Id.Keyword_threadlocal },
-        Keyword{ .bytes = "true", .id = Id.Keyword_true },
-        Keyword{ .bytes = "try", .id = Id.Keyword_try },
-        Keyword{ .bytes = "undefined", .id = Id.Keyword_undefined },
-        Keyword{ .bytes = "union", .id = Id.Keyword_union },
-        Keyword{ .bytes = "unreachable", .id = Id.Keyword_unreachable },
-        Keyword{ .bytes = "usingnamespace", .id = Id.Keyword_usingnamespace },
-        Keyword{ .bytes = "var", .id = Id.Keyword_var },
-        Keyword{ .bytes = "volatile", .id = Id.Keyword_volatile },
-        Keyword{ .bytes = "while", .id = Id.Keyword_while },
+        Keyword.init("align", .Keyword_align),
+        Keyword.init("allowzero", .Keyword_allowzero),
+        Keyword.init("and", .Keyword_and),
+        Keyword.init("anyframe", .Keyword_anyframe),
+        Keyword.init("asm", .Keyword_asm),
+        Keyword.init("async", .Keyword_async),
+        Keyword.init("await", .Keyword_await),
+        Keyword.init("break", .Keyword_break),
+        Keyword.init("catch", .Keyword_catch),
+        Keyword.init("comptime", .Keyword_comptime),
+        Keyword.init("const", .Keyword_const),
+        Keyword.init("continue", .Keyword_continue),
+        Keyword.init("defer", .Keyword_defer),
+        Keyword.init("else", .Keyword_else),
+        Keyword.init("enum", .Keyword_enum),
+        Keyword.init("errdefer", .Keyword_errdefer),
+        Keyword.init("error", .Keyword_error),
+        Keyword.init("export", .Keyword_export),
+        Keyword.init("extern", .Keyword_extern),
+        Keyword.init("false", .Keyword_false),
+        Keyword.init("fn", .Keyword_fn),
+        Keyword.init("for", .Keyword_for),
+        Keyword.init("if", .Keyword_if),
+        Keyword.init("inline", .Keyword_inline),
+        Keyword.init("nakedcc", .Keyword_nakedcc),
+        Keyword.init("noalias", .Keyword_noalias),
+        Keyword.init("noasync", .Keyword_noasync),
+        Keyword.init("noinline", .Keyword_noinline),
+        Keyword.init("null", .Keyword_null),
+        Keyword.init("or", .Keyword_or),
+        Keyword.init("orelse", .Keyword_orelse),
+        Keyword.init("packed", .Keyword_packed),
+        Keyword.init("pub", .Keyword_pub),
+        Keyword.init("resume", .Keyword_resume),
+        Keyword.init("return", .Keyword_return),
+        Keyword.init("linksection", .Keyword_linksection),
+        Keyword.init("stdcallcc", .Keyword_stdcallcc),
+        Keyword.init("struct", .Keyword_struct),
+        Keyword.init("suspend", .Keyword_suspend),
+        Keyword.init("switch", .Keyword_switch),
+        Keyword.init("test", .Keyword_test),
+        Keyword.init("threadlocal", .Keyword_threadlocal),
+        Keyword.init("true", .Keyword_true),
+        Keyword.init("try", .Keyword_try),
+        Keyword.init("undefined", .Keyword_undefined),
+        Keyword.init("union", .Keyword_union),
+        Keyword.init("unreachable", .Keyword_unreachable),
+        Keyword.init("usingnamespace", .Keyword_usingnamespace),
+        Keyword.init("var", .Keyword_var),
+        Keyword.init("volatile", .Keyword_volatile),
+        Keyword.init("while", .Keyword_while),
     };
 
     // TODO perfect hash at comptime
-    fn getKeyword(bytes: []const u8) ?Id {
+    pub fn getKeyword(bytes: []const u8) ?Id {
+        var hash = std.hash_map.hashString(bytes);
         for (keywords) |kw| {
-            if (mem.eql(u8, kw.bytes, bytes)) {
+            if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) {
                 return kw.id;
             }
         }
src-self-hosted/translate_c.zig
@@ -348,7 +348,7 @@ fn visitVarDecl(c: *Context, var_decl: *const ZigClangVarDecl) Error!void {
     else
         try appendToken(c, .Keyword_var, "var");
 
-    const name_tok = try appendToken(c, .Identifier, var_name);
+    const name_tok = try appendIdentifier(c, var_name);
 
     _ = try appendToken(c, .Colon, ":");
     const type_node = transQualType(rp, qual_type, var_decl_loc) catch |err| switch (err) {
@@ -407,7 +407,7 @@ fn resolveTypeDef(c: *Context, typedef_decl: *const ZigClangTypedefNameDecl) Err
     const const_tok = try appendToken(c, .Keyword_const, "const");
 
     const typedef_name = try c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, typedef_decl)));
-    const name_tok = try appendToken(c, .Identifier, typedef_name);
+    const name_tok = try appendIdentifier(c, typedef_name);
     const eq_tok = try appendToken(c, .Equal, "=");
 
     const child_qt = ZigClangTypedefNameDecl_getUnderlyingType(typedef_decl);
@@ -460,7 +460,7 @@ fn resolveRecordDecl(c: *Context, record_decl: *const ZigClangRecordDecl) Error!
     const const_tok = try appendToken(c, .Keyword_const, "const");
 
     const name = try std.fmt.allocPrint(c.a(), "{}_{}", .{ container_kind_name, bare_name });
-    const name_tok = try appendToken(c, .Identifier, name);
+    const name_tok = try appendIdentifier(c, name);
 
     const eq_tok = try appendToken(c, .Equal, "=");
     const init_node = transRecordDecl(c, record_decl) catch |err| switch (err) {
@@ -497,10 +497,10 @@ fn resolveRecordDecl(c: *Context, record_decl: *const ZigClangRecordDecl) Error!
 fn createAlias(c: *Context, alias: var) !void {
     const visib_tok = try appendToken(c, .Keyword_pub, "pub");
     const mut_tok = try appendToken(c, .Keyword_const, "const");
-    const name_tok = try appendToken(c, .Identifier, alias.alias);
+    const name_tok = try appendIdentifier(c, alias.alias);
 
     const eq_tok = try appendToken(c, .Equal, "=");
-    const init_node = try appendIdentifier(c, alias.name);
+    const init_node = try transCreateNodeIdentifier(c, alias.name);
 
     const node = try c.a().create(ast.Node.VarDecl);
     node.* = ast.Node.VarDecl{
@@ -787,7 +787,7 @@ fn transDeclStmt(rp: RestorePoint, parent_scope: *Scope, stmt: *const ZigClangDe
                 const c_name = try c.str(ZigClangDecl_getName_bytes_begin(
                     @ptrCast(*const ZigClangDecl, var_decl),
                 ));
-                const name_token = try appendToken(c, .Identifier, c_name);
+                const name_token = try appendIdentifier(c, c_name);
 
                 const var_scope = try c.a().create(Scope.Var);
                 var_scope.* = Scope.Var{
@@ -856,7 +856,7 @@ fn transDeclRefExpr(
     const c_name = try rp.c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, value_decl)));
     const zig_name = transLookupZigIdentifier(scope, c_name);
     if (lrvalue == .l_value) try rp.c.ptr_params.put(zig_name);
-    const node = try appendIdentifier(rp.c, zig_name);
+    const node = try transCreateNodeIdentifier(rp.c, zig_name);
     return TransResult{
         .node = node,
         .node_scope = scope,
@@ -1292,7 +1292,7 @@ fn maybeSuppressResult(
     if (used == .used) return result;
     // NOTE: This is backwards, but the semicolon must immediately follow the node.
     _ = try appendToken(rp.c, .Semicolon, ";");
-    const lhs = try appendIdentifier(rp.c, "_");
+    const lhs = try transCreateNodeIdentifier(rp.c, "_");
     const op_token = try appendToken(rp.c, .Equal, "=");
     const op_node = try rp.c.a().create(ast.Node.InfixOp);
     op_node.* = ast.Node.InfixOp{
@@ -1374,7 +1374,7 @@ fn transRecordDecl(c: *Context, record_decl: *const ZigClangRecordDecl) TypeErro
             return node;
         }
 
-        const field_name = try appendToken(c, .Identifier, try c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, field_decl))));
+        const field_name = try appendIdentifier(c, try c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, field_decl))));
         _ = try appendToken(c, .Colon, ":");
         const field_type = try transQualType(rp, ZigClangFieldDecl_getType(field_decl), field_loc);
 
@@ -1644,7 +1644,7 @@ fn transCreateNodePtrType(
         .Identifier => blk: {
             const lbracket = try appendToken(c, .LBracket, "["); // Rendering checks if this token + 2 == .Identifier, so needs to return this token
             _ = try appendToken(c, .Asterisk, "*");
-            _ = try appendToken(c, .Identifier, "c");
+            _ = try appendIdentifier(c, "c");
             _ = try appendToken(c, .RBracket, "]");
             break :blk lbracket;
         },
@@ -1793,25 +1793,25 @@ fn transType(rp: RestorePoint, ty: *const ZigClangType, source_loc: ZigClangSour
         .Builtin => {
             const builtin_ty = @ptrCast(*const ZigClangBuiltinType, ty);
             switch (ZigClangBuiltinType_getKind(builtin_ty)) {
-                .Void => return appendIdentifier(rp.c, "c_void"),
-                .Bool => return appendIdentifier(rp.c, "bool"),
-                .Char_U, .UChar, .Char_S, .Char8 => return appendIdentifier(rp.c, "u8"),
-                .SChar => return appendIdentifier(rp.c, "i8"),
-                .UShort => return appendIdentifier(rp.c, "c_ushort"),
-                .UInt => return appendIdentifier(rp.c, "c_uint"),
-                .ULong => return appendIdentifier(rp.c, "c_ulong"),
-                .ULongLong => return appendIdentifier(rp.c, "c_ulonglong"),
-                .Short => return appendIdentifier(rp.c, "c_short"),
-                .Int => return appendIdentifier(rp.c, "c_int"),
-                .Long => return appendIdentifier(rp.c, "c_long"),
-                .LongLong => return appendIdentifier(rp.c, "c_longlong"),
-                .UInt128 => return appendIdentifier(rp.c, "u128"),
-                .Int128 => return appendIdentifier(rp.c, "i128"),
-                .Float => return appendIdentifier(rp.c, "f32"),
-                .Double => return appendIdentifier(rp.c, "f64"),
-                .Float128 => return appendIdentifier(rp.c, "f128"),
-                .Float16 => return appendIdentifier(rp.c, "f16"),
-                .LongDouble => return appendIdentifier(rp.c, "c_longdouble"),
+                .Void => return transCreateNodeIdentifier(rp.c, "c_void"),
+                .Bool => return transCreateNodeIdentifier(rp.c, "bool"),
+                .Char_U, .UChar, .Char_S, .Char8 => return transCreateNodeIdentifier(rp.c, "u8"),
+                .SChar => return transCreateNodeIdentifier(rp.c, "i8"),
+                .UShort => return transCreateNodeIdentifier(rp.c, "c_ushort"),
+                .UInt => return transCreateNodeIdentifier(rp.c, "c_uint"),
+                .ULong => return transCreateNodeIdentifier(rp.c, "c_ulong"),
+                .ULongLong => return transCreateNodeIdentifier(rp.c, "c_ulonglong"),
+                .Short => return transCreateNodeIdentifier(rp.c, "c_short"),
+                .Int => return transCreateNodeIdentifier(rp.c, "c_int"),
+                .Long => return transCreateNodeIdentifier(rp.c, "c_long"),
+                .LongLong => return transCreateNodeIdentifier(rp.c, "c_longlong"),
+                .UInt128 => return transCreateNodeIdentifier(rp.c, "u128"),
+                .Int128 => return transCreateNodeIdentifier(rp.c, "i128"),
+                .Float => return transCreateNodeIdentifier(rp.c, "f32"),
+                .Double => return transCreateNodeIdentifier(rp.c, "f64"),
+                .Float128 => return transCreateNodeIdentifier(rp.c, "f128"),
+                .Float16 => return transCreateNodeIdentifier(rp.c, "f16"),
+                .LongDouble => return transCreateNodeIdentifier(rp.c, "c_longdouble"),
                 else => return revertAndWarn(rp, error.UnsupportedType, source_loc, "unsupported builtin type", .{}),
             }
         },
@@ -1891,14 +1891,14 @@ fn transType(rp: RestorePoint, ty: *const ZigClangType, source_loc: ZigClangSour
 
             const typedef_decl = ZigClangTypedefType_getDecl(typedef_ty);
             const typedef_name = try rp.c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, typedef_decl)));
-            return appendIdentifier(rp.c, typedef_name);
+            return transCreateNodeIdentifier(rp.c, typedef_name);
         },
         .Record => {
             const record_ty = @ptrCast(*const ZigClangRecordType, ty);
 
             const record_decl = ZigClangRecordType_getDecl(record_ty);
-            if (try getContainerName(rp.c, record_decl)) |name|
-                return appendIdentifier(rp.c, name)
+            if (try getContainerName(rp, record_decl)) |name|
+                return transCreateNodeIdentifier(rp.c, name)
             else
                 return transRecordDecl(rp.c, record_decl);
         },
@@ -1913,22 +1913,20 @@ fn transType(rp: RestorePoint, ty: *const ZigClangType, source_loc: ZigClangSour
     }
 }
 
-fn getContainerName(c: *Context, record_decl: *const ZigClangRecordDecl) !?[]const u8 {
-    const bare_name = try c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, record_decl)));
+fn getContainerName(rp: RestorePoint, record_decl: *const ZigClangRecordDecl) !?[]const u8 {
+    const bare_name = try rp.c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, record_decl)));
 
     const container_kind_name = if (ZigClangRecordDecl_isUnion(record_decl))
         "union"
     else if (ZigClangRecordDecl_isStruct(record_decl))
         "struct"
-    else {
-        try emitWarning(c, ZigClangRecordDecl_getLocation(record_decl), "record {} is not a struct or union", .{bare_name});
-        return null;
-    };
+    else
+        return revertAndWarn(rp, error.UnsupportedType, ZigClangRecordDecl_getLocation(record_decl), "record {} is not a struct or union", .{bare_name});
 
     if (ZigClangRecordDecl_isAnonymousStructOrUnion(record_decl) or bare_name.len == 0)
         return null;
 
-    return try std.fmt.allocPrint(c.a(), "{}_{}", .{ container_kind_name, bare_name });
+    return try std.fmt.allocPrint(rp.c.a(), "{}_{}", .{ container_kind_name, bare_name });
 }
 
 fn isCVoid(qt: ZigClangQualType) bool {
@@ -2020,7 +2018,7 @@ fn finishTransFnProto(
     else
         null;
     const fn_tok = try appendToken(rp.c, .Keyword_fn, "fn");
-    const name_tok = if (fn_decl_context) |ctx| try appendToken(rp.c, .Identifier, ctx.fn_name) else null;
+    const name_tok = if (fn_decl_context) |ctx| try appendIdentifier(rp.c, ctx.fn_name) else null;
     const lparen_tok = try appendToken(rp.c, .LParen, "(");
 
     var fn_params = ast.Node.FnProto.ParamList.init(rp.c.a());
@@ -2038,7 +2036,7 @@ fn finishTransFnProto(
                 const param_name = try rp.c.str(ZigClangDecl_getName_bytes_begin(@ptrCast(*const ZigClangDecl, param)));
                 if (param_name.len > 0) {
                     // TODO: If len == 0, auto-generate arg1, arg2, etc? Or leave the name blank?
-                    const result = try appendToken(rp.c, .Identifier, param_name);
+                    const result = try appendIdentifier(rp.c, param_name);
                     _ = try appendToken(rp.c, .Colon, ":");
                     break :blk result;
                 }
@@ -2087,12 +2085,12 @@ fn finishTransFnProto(
 
     const return_type_node = blk: {
         if (ZigClangFunctionType_getNoReturnAttr(fn_ty)) {
-            break :blk try appendIdentifier(rp.c, "noreturn");
+            break :blk try transCreateNodeIdentifier(rp.c, "noreturn");
         } else {
             const return_qt = ZigClangFunctionType_getReturnType(fn_ty);
             if (isCVoid(return_qt)) {
                 // convert primitive c_void to actual void (only for return type)
-                break :blk try appendIdentifier(rp.c, "void");
+                break :blk try transCreateNodeIdentifier(rp.c, "void");
             } else {
                 break :blk transQualType(rp, return_qt, source_loc) catch |err| switch (err) {
                     error.UnsupportedType => {
@@ -2145,7 +2143,7 @@ fn emitWarning(c: *Context, loc: ZigClangSourceLocation, comptime format: []cons
 fn failDecl(c: *Context, loc: ZigClangSourceLocation, name: []const u8, comptime format: []const u8, args: var) !void {
     // const name = @compileError(msg);
     const const_tok = try appendToken(c, .Keyword_const, "const");
-    const name_tok = try appendToken(c, .Identifier, name);
+    const name_tok = try appendIdentifier(c, name);
     const eq_tok = try appendToken(c, .Equal, "=");
     const builtin_tok = try appendToken(c, .Builtin, "@compileError");
     const lparen_tok = try appendToken(c, .LParen, "(");
@@ -2190,6 +2188,7 @@ fn failDecl(c: *Context, loc: ZigClangSourceLocation, name: []const u8, comptime
 }
 
 fn appendToken(c: *Context, token_id: Token.Id, bytes: []const u8) !ast.TokenIndex {
+    std.debug.assert(token_id != .Identifier); // use appendIdentifier
     return appendTokenFmt(c, token_id, "{}", .{bytes});
 }
 
@@ -2218,8 +2217,16 @@ fn appendTokenFmt(c: *Context, token_id: Token.Id, comptime format: []const u8,
     return token_index;
 }
 
-fn appendIdentifier(c: *Context, name: []const u8) !*ast.Node {
-    const token_index = try appendToken(c, .Identifier, name);
+fn appendIdentifier(c: *Context, name: []const u8) !ast.TokenIndex {
+    if (std.zig.Token.getKeyword(name)) |_| {
+        return appendTokenFmt(c, .Identifier, "@\"{}\"", .{name});
+    } else {
+        return appendTokenFmt(c, .Identifier, "{}", .{name});
+    }
+}
+
+fn transCreateNodeIdentifier(c: *Context, name: []const u8) !*ast.Node {
+    const token_index = try appendIdentifier(c, name);
     const identifier = try c.a().create(ast.Node.Identifier);
     identifier.* = ast.Node.Identifier{
         .base = ast.Node{ .id = ast.Node.Id.Identifier },
test/translate_c.zig
@@ -686,7 +686,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\pub const SDL_INIT_VIDEO = @as(c_ulonglong, 32);
     });
 
-    cases.add("zig keywords in C code",
+    cases.add_both("zig keywords in C code",
         \\struct comptime {
         \\    int defer;
         \\};