Commit 9ea2076663

mlugg <mlugg@mlugg.co.uk>
2023-09-18 03:05:35
translate-c: prevent variable names conflicting with type names
This introduces the concept of a "weak global name" into translate-c. translate-c consists of two passes. The first is important, because it discovers all global names, which are used to prevent naming conflicts: whenever we see an identifier in the second pass, we can mangle it if it conflicts with any global or any other in-scope identifier. Unfortunately, this is a bit tricky for structs, unions, and enums. In C, these types are not represented by normal identifers, but by separate tags - `struct foo` does not prevent an unrelated identifier `foo` existing. In general, we want to translate type names to user-friendly ones such as `struct_foo` and `foo` where possible, but we can't guarantee such names will not conflict with real variable names. This is where weak global names come in. In the initial pass, when a global type declaration is seen, `struct_foo` and `foo` are both added as weak global names. This essentially means that we will use these names for the type *if possible*, but if there is another global with the same name, we will mangle the type name instead. Then, when actually translating the declaration, we check whether there's a "true" global with a conflicting name, in which case we mangle our name. If the user-friendly alias `foo` conflicts, we do not attempt to mangle it: we just don't emit it, because a mangled alias isn't particularly helpful.
1 parent d2a9378
src/translate_c.zig
@@ -218,7 +218,7 @@ const Scope = struct {
 
         /// Check if the global scope contains the name, includes all decls that haven't been translated yet.
         fn contains(scope: *Root, name: []const u8) bool {
-            return scope.containsNow(name) or scope.context.global_names.contains(name);
+            return scope.containsNow(name) or scope.context.global_names.contains(name) or scope.context.weak_global_names.contains(name);
         }
     };
 
@@ -335,6 +335,15 @@ pub const Context = struct {
     /// up front in a pre-processing step.
     global_names: std.StringArrayHashMapUnmanaged(void) = .{},
 
+    /// This is similar to `global_names`, but contains names which we would
+    /// *like* to use, but do not strictly *have* to if they are unavailable.
+    /// These are relevant to types, which ideally we would name like
+    /// 'struct_foo' with an alias 'foo', but if either of those names is taken,
+    /// may be mangled.
+    /// This is distinct from `global_names` so we can detect at a type
+    /// declaration whether or not the name is available.
+    weak_global_names: std.StringArrayHashMapUnmanaged(void) = .{},
+
     pattern_list: PatternList,
 
     fn getMangle(c: *Context) u32 {
@@ -425,10 +434,8 @@ pub fn translate(
 
     try addMacros(&context);
     for (context.alias_list.items) |alias| {
-        if (!context.global_scope.sym_table.contains(alias.alias)) {
-            const node = try Tag.alias.create(arena, .{ .actual = alias.alias, .mangled = alias.name });
-            try addTopLevelDecl(&context, alias.alias, node);
-        }
+        const node = try Tag.alias.create(arena, .{ .actual = alias.alias, .mangled = alias.name });
+        try addTopLevelDecl(&context, alias.alias, node);
     }
 
     return ast.render(gpa, context.global_scope.nodes.items);
@@ -493,7 +500,29 @@ fn declVisitorC(context: ?*anyopaque, decl: *const clang.Decl) callconv(.C) bool
 fn declVisitorNamesOnly(c: *Context, decl: *const clang.Decl) Error!void {
     if (decl.castToNamedDecl()) |named_decl| {
         const decl_name = try c.str(named_decl.getName_bytes_begin());
-        try c.global_names.put(c.gpa, decl_name, {});
+
+        switch (decl.getKind()) {
+            .Record, .Enum => {
+                // These types are prefixed with the container kind.
+                const container_prefix = if (decl.getKind() == .Record) prefix: {
+                    const record_decl: *const clang.RecordDecl = @ptrCast(decl);
+                    if (record_decl.isUnion()) {
+                        break :prefix "union";
+                    } else {
+                        break :prefix "struct";
+                    }
+                } else "enum";
+                const prefixed_name = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_prefix, decl_name });
+                // `decl_name` and `prefixed_name` are the preferred names for this type.
+                // However, we can name it anything else if necessary, so these are "weak names".
+                try c.weak_global_names.ensureUnusedCapacity(c.gpa, 2);
+                c.weak_global_names.putAssumeCapacity(decl_name, {});
+                c.weak_global_names.putAssumeCapacity(prefixed_name, {});
+            },
+            else => {
+                try c.global_names.put(c.gpa, decl_name, {});
+            },
+        }
 
         // Check for typedefs with unnamed enum/record child types.
         if (decl.getKind() == .Typedef) {
@@ -1079,6 +1108,21 @@ fn flexibleArrayField(c: *Context, record_def: *const clang.RecordDecl) ?*const
     return flexible_field;
 }
 
+fn mangleWeakGlobalName(c: *Context, want_name: []const u8) ![]const u8 {
+    var cur_name = want_name;
+
+    if (!c.weak_global_names.contains(want_name)) {
+        // This type wasn't noticed by the name detection pass, so nothing has been treating this as
+        // a weak global name. We must mangle it to avoid conflicts with locals.
+        cur_name = try std.fmt.allocPrint(c.arena, "{s}_{d}", .{ want_name, c.getMangle() });
+    }
+
+    while (c.global_names.contains(cur_name)) {
+        cur_name = try std.fmt.allocPrint(c.arena, "{s}_{d}", .{ want_name, c.getMangle() });
+    }
+    return cur_name;
+}
+
 fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordDecl) Error!void {
     if (c.decl_table.get(@intFromPtr(record_decl.getCanonicalDecl()))) |_|
         return; // Avoid processing this decl twice
@@ -1113,6 +1157,9 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD
             is_unnamed = true;
         }
         name = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_kind_name, bare_name });
+        if (toplevel and !is_unnamed) {
+            name = try mangleWeakGlobalName(c, name);
+        }
     }
     if (!toplevel) name = try bs.makeMangledName(c, name);
     try c.decl_table.putNoClobber(c.gpa, @intFromPtr(record_decl.getCanonicalDecl()), name);
@@ -1217,7 +1264,10 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD
     const node = Node.initPayload(&payload.base);
     if (toplevel) {
         try addTopLevelDecl(c, name, node);
-        if (!is_unnamed)
+        // Only add the alias if the name is available *and* it was caught by
+        // name detection. Don't bother performing a weak mangle, since a
+        // mangled name is of no real use here.
+        if (!is_unnamed and !c.global_names.contains(bare_name) and c.weak_global_names.contains(bare_name))
             try c.alias_list.append(.{ .alias = bare_name, .name = name });
     } else {
         try scope.appendNode(node);
@@ -1246,6 +1296,9 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: *const clang.EnumDecl) E
             is_unnamed = true;
         }
         name = try std.fmt.allocPrint(c.arena, "enum_{s}", .{bare_name});
+        if (toplevel and !is_unnamed) {
+            name = try mangleWeakGlobalName(c, name);
+        }
     }
     if (!toplevel) name = try bs.makeMangledName(c, name);
     try c.decl_table.putNoClobber(c.gpa, @intFromPtr(enum_decl.getCanonicalDecl()), name);
@@ -1313,7 +1366,10 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: *const clang.EnumDecl) E
     const node = Node.initPayload(&payload.base);
     if (toplevel) {
         try addTopLevelDecl(c, name, node);
-        if (!is_unnamed)
+        // Only add the alias if the name is available *and* it was caught by
+        // name detection. Don't bother performing a weak mangle, since a
+        // mangled name is of no real use here.
+        if (!is_unnamed and !c.global_names.contains(bare_name) and c.weak_global_names.contains(bare_name))
             try c.alias_list.append(.{ .alias = bare_name, .name = name });
     } else {
         try scope.appendNode(node);
@@ -4881,7 +4937,7 @@ fn transType(c: *Context, scope: *Scope, ty: *const clang.Type, source_loc: clan
             var trans_scope = scope;
             if (@as(*const clang.Decl, @ptrCast(record_decl)).castToNamedDecl()) |named_decl| {
                 const decl_name = try c.str(named_decl.getName_bytes_begin());
-                if (c.global_names.get(decl_name)) |_| trans_scope = &c.global_scope.base;
+                if (c.weak_global_names.contains(decl_name)) trans_scope = &c.global_scope.base;
             }
             try transRecordDecl(c, trans_scope, record_decl);
             const name = c.decl_table.get(@intFromPtr(record_decl.getCanonicalDecl())).?;
@@ -4894,7 +4950,7 @@ fn transType(c: *Context, scope: *Scope, ty: *const clang.Type, source_loc: clan
             var trans_scope = scope;
             if (@as(*const clang.Decl, @ptrCast(enum_decl)).castToNamedDecl()) |named_decl| {
                 const decl_name = try c.str(named_decl.getName_bytes_begin());
-                if (c.global_names.get(decl_name)) |_| trans_scope = &c.global_scope.base;
+                if (c.weak_global_names.contains(decl_name)) trans_scope = &c.global_scope.base;
             }
             try transEnumDecl(c, trans_scope, enum_decl);
             const name = c.decl_table.get(@intFromPtr(enum_decl.getCanonicalDecl())).?;
test/run_translated_c.zig
@@ -1905,4 +1905,28 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void {
         \\    return 0;
         \\}
     , "");
+
+    cases.add("struct without global declaration does not conflict with local variable name",
+        \\#include <stdlib.h>
+        \\static void foo(struct foobar *unused) {}
+        \\int main(void) {
+        \\    int struct_foobar = 123;
+        \\    if (struct_foobar != 123) abort();
+        \\    int foobar = 456;
+        \\    if (foobar != 456) abort();
+        \\    return 0;
+        \\}
+    , "");
+
+    cases.add("struct without global declaration does not conflict with global variable name",
+        \\#include <stdlib.h>
+        \\static void foo(struct foobar *unused) {}
+        \\static int struct_foobar = 123;
+        \\static int foobar = 456;
+        \\int main(void) {
+        \\    if (struct_foobar != 123) abort();
+        \\    if (foobar != 456) abort();
+        \\    return 0;
+        \\}
+    , "");
 }
test/translate_c.zig
@@ -148,16 +148,16 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\} a = {};
         \\#define PTR void *
     , &[_][]const u8{
-        \\pub const struct_Bar = extern struct {
+        \\pub const struct_Bar_1 = extern struct {
         \\    a: c_int,
         \\};
         \\pub const struct_Foo = extern struct {
         \\    a: c_int,
-        \\    b: struct_Bar,
+        \\    b: struct_Bar_1,
         \\};
         \\pub export var a: struct_Foo = struct_Foo{
         \\    .a = 0,
-        \\    .b = @import("std").mem.zeroes(struct_Bar),
+        \\    .b = @import("std").mem.zeroes(struct_Bar_1),
         \\};
         ,
         \\pub const PTR = ?*anyopaque;
@@ -2361,11 +2361,11 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
             \\    struct Bar c;
             \\};
         , &[_][]const u8{
-            \\pub const struct_Bar = extern struct {
+            \\pub const struct_Bar_1 = extern struct {
             \\    b: c_int,
             \\};
             \\pub const struct_Foo = extern struct {
-            \\    c: struct_Bar,
+            \\    c: struct_Bar_1,
             \\};
         });
     }
@@ -4135,4 +4135,19 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
     , &[_][]const u8{
         \\pub const FOO = @compileError("unable to translate macro: untranslatable usage of arg `x`");
     });
+
+    cases.add("global struct whose default name conflicts with global is mangled",
+        \\struct foo {
+        \\    int x;
+        \\};
+        \\const char *struct_foo = "hello world";
+    , &[_][]const u8{
+        \\pub const struct_foo_1 = extern struct {
+        \\    x: c_int,
+        \\};
+        ,
+        \\pub const foo = struct_foo_1;
+        ,
+        \\pub export var struct_foo: [*c]const u8 = "hello world";
+    });
 }