Commit 57b2176e28

Evan Haas <evan@lagerdata.com>
2021-01-20 08:42:48
translate-c: Improve array support
1. For incomplete arrays with initializer list (`int x[] = {1};`) use the initializer size as the array size. 2. For arrays initialized with a string literal translate it as an array of character literals instead of `[*c]const u8` 3. Don't crash if an empty initializer is used for an incomplete array. 4. Add a test for multi-character character constants Additionally lay some groundwork for supporting wide string literals. fixes #4831 #7832 #7842
1 parent 68a040a
src/clang.zig
@@ -735,6 +735,15 @@ pub const StringLiteral = opaque {
     pub const getKind = ZigClangStringLiteral_getKind;
     extern fn ZigClangStringLiteral_getKind(*const StringLiteral) StringLiteral_StringKind;
 
+    pub const getCodeUnit = ZigClangStringLiteral_getCodeUnit;
+    extern fn ZigClangStringLiteral_getCodeUnit(*const StringLiteral, usize) u32;
+
+    pub const getLength = ZigClangStringLiteral_getLength;
+    extern fn ZigClangStringLiteral_getLength(*const StringLiteral) c_uint;
+
+    pub const getCharByteWidth = ZigClangStringLiteral_getCharByteWidth;
+    extern fn ZigClangStringLiteral_getCharByteWidth(*const StringLiteral) c_uint;
+
     pub const getString_bytes_begin_size = ZigClangStringLiteral_getString_bytes_begin_size;
     extern fn ZigClangStringLiteral_getString_bytes_begin_size(*const StringLiteral, *usize) [*]const u8;
 };
src/translate_c.zig
@@ -710,6 +710,12 @@ fn visitFnDecl(c: *Context, fn_decl: *const clang.FunctionDecl) Error!void {
     return addTopLevelDecl(c, fn_name, &proto_node.base);
 }
 
+fn transQualTypeMaybeInitialized(rp: RestorePoint, qt: clang.QualType, decl_init: ?*const clang.Expr, loc: clang.SourceLocation) TransError!*ast.Node {
+    return if (decl_init) |init_expr|
+        transQualTypeInitialized(rp, qt, init_expr, loc)
+    else
+        transQualType(rp, qt, loc);
+}
 /// if mangled_name is not null, this var decl was declared in a block scope.
 fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]const u8) Error!void {
     const var_name = mangled_name orelse try c.str(@ptrCast(*const clang.NamedDecl, var_decl).getName_bytes_begin());
@@ -734,6 +740,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
     const storage_class = var_decl.getStorageClass();
     const is_const = qual_type.isConstQualified();
     const has_init = var_decl.hasInit();
+    const decl_init = var_decl.getInit();
 
     // In C extern variables with initializers behave like Zig exports.
     // extern int foo = 2;
@@ -755,8 +762,9 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
     const name_tok = try appendIdentifier(c, checked_name);
 
     _ = try appendToken(c, .Colon, ":");
-    const type_node = transQualType(rp, qual_type, var_decl_loc) catch |err| switch (err) {
-        error.UnsupportedType => {
+
+    const type_node = transQualTypeMaybeInitialized(rp, qual_type, decl_init, var_decl_loc) catch |err| switch (err) {
+        error.UnsupportedTranslation, error.UnsupportedType => {
             return failDecl(c, var_decl_loc, checked_name, "unable to resolve variable type", .{});
         },
         error.OutOfMemory => |e| return e,
@@ -770,17 +778,22 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
     // with the variable type.
     if (has_init) {
         eq_tok = try appendToken(c, .Equal, "=");
-        init_node = if (var_decl.getInit()) |expr|
-            transExprCoercing(rp, &c.global_scope.base, expr, .used, .r_value) catch |err| switch (err) {
+        if (decl_init) |expr| {
+            const node_or_error = if (expr.getStmtClass() == .StringLiteralClass)
+                transStringLiteralAsArray(rp, &c.global_scope.base, @ptrCast(*const clang.StringLiteral, expr), type_node)
+            else
+                transExprCoercing(rp, scope, expr, .used, .r_value);
+            init_node = node_or_error catch |err| switch (err) {
                 error.UnsupportedTranslation,
                 error.UnsupportedType,
                 => {
                     return failDecl(c, var_decl_loc, checked_name, "unable to translate initializer", .{});
                 },
                 error.OutOfMemory => |e| return e,
-            }
-        else
-            try transCreateNodeUndefinedLiteral(c);
+            };
+        } else {
+            init_node = try transCreateNodeUndefinedLiteral(c);
+        }
     } else if (storage_class != .Extern) {
         eq_tok = try appendToken(c, .Equal, "=");
         // The C language specification states that variables with static or threadlocal
@@ -1620,6 +1633,7 @@ fn transDeclStmtOne(
     switch (decl.getKind()) {
         .Var => {
             const var_decl = @ptrCast(*const clang.VarDecl, decl);
+            const decl_init = var_decl.getInit();
 
             const qual_type = var_decl.getTypeSourceInfo_getType();
             const name = try c.str(@ptrCast(*const clang.NamedDecl, var_decl).getName_bytes_begin());
@@ -1643,11 +1657,14 @@ fn transDeclStmtOne(
 
             _ = try appendToken(c, .Colon, ":");
             const loc = decl.getLocation();
-            const type_node = try transQualType(rp, qual_type, loc);
+            const type_node = try transQualTypeMaybeInitialized(rp, qual_type, decl_init, loc);
 
             const eq_token = try appendToken(c, .Equal, "=");
-            var init_node = if (var_decl.getInit()) |expr|
-                try transExprCoercing(rp, scope, expr, .used, .r_value)
+            var init_node = if (decl_init) |expr|
+                if (expr.getStmtClass() == .StringLiteralClass)
+                    try transStringLiteralAsArray(rp, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
+                else
+                    try transExprCoercing(rp, scope, expr, .used, .r_value)
             else
                 try transCreateNodeUndefinedLiteral(c);
             if (!qualTypeIsBoolean(qual_type) and isBoolRes(init_node)) {
@@ -1740,7 +1757,7 @@ fn transImplicitCastExpr(
             return maybeSuppressResult(rp, scope, result_used, sub_expr_node);
         },
         .ArrayToPointerDecay => {
-            if (exprIsStringLiteral(sub_expr)) {
+            if (exprIsNarrowStringLiteral(sub_expr)) {
                 const sub_expr_node = try transExpr(rp, scope, sub_expr, .used, .r_value);
                 return maybeSuppressResult(rp, scope, result_used, sub_expr_node);
             }
@@ -1841,17 +1858,20 @@ fn exprIsBooleanType(expr: *const clang.Expr) bool {
     return qualTypeIsBoolean(expr.getType());
 }
 
-fn exprIsStringLiteral(expr: *const clang.Expr) bool {
+fn exprIsNarrowStringLiteral(expr: *const clang.Expr) bool {
     switch (expr.getStmtClass()) {
-        .StringLiteralClass => return true,
+        .StringLiteralClass => {
+            const string_lit = @ptrCast(*const clang.StringLiteral, expr);
+            return string_lit.getCharByteWidth() == 1;
+        },
         .PredefinedExprClass => return true,
         .UnaryOperatorClass => {
             const op_expr = @ptrCast(*const clang.UnaryOperator, expr).getSubExpr();
-            return exprIsStringLiteral(op_expr);
+            return exprIsNarrowStringLiteral(op_expr);
         },
         .ParenExprClass => {
             const op_expr = @ptrCast(*const clang.ParenExpr, expr).getSubExpr();
-            return exprIsStringLiteral(op_expr);
+            return exprIsNarrowStringLiteral(op_expr);
         },
         else => return false,
     }
@@ -2049,6 +2069,71 @@ fn transStringLiteral(
     }
 }
 
+/// Parse the size of an array back out from an ast Node.
+fn zigArraySize(c: *Context, node: *ast.Node) TransError!usize {
+    if (node.castTag(.ArrayType)) |array| {
+        if (array.len_expr.castTag(.IntegerLiteral)) |int_lit| {
+            const tok = tokenSlice(c, int_lit.token);
+            return std.fmt.parseUnsigned(usize, tok, 10) catch error.UnsupportedTranslation;
+        }
+    }
+    return error.UnsupportedTranslation;
+}
+
+/// Translate a string literal to an array of integers. Used when an
+/// array is initialized from a string literal. `target_node` is the
+/// array being initialized. If the string literal is larger than the
+/// array, truncate the string. If the array is larger than the string
+/// literal, pad the array with 0's
+fn transStringLiteralAsArray(
+    rp: RestorePoint,
+    scope: *Scope,
+    stmt: *const clang.StringLiteral,
+    target_node: *ast.Node,
+) TransError!*ast.Node {
+    const array_size = try zigArraySize(rp.c, target_node);
+    const str_length = stmt.getLength();
+
+    const expr_base = @ptrCast(*const clang.Expr, stmt);
+    const ty = expr_base.getType().getTypePtr();
+    const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty);
+
+    const ty_node = try rp.c.arena.create(ast.Node.ArrayType);
+    const op_token = try appendToken(rp.c, .LBracket, "[");
+    const len_expr = try transCreateNodeInt(rp.c, array_size);
+    _ = try appendToken(rp.c, .RBracket, "]");
+
+    ty_node.* = .{
+        .op_token = op_token,
+        .rhs = try transQualType(rp, const_arr_ty.getElementType(), expr_base.getBeginLoc()),
+        .len_expr = len_expr,
+    };
+    _ = try appendToken(rp.c, .LBrace, "{");
+    var init_node = try ast.Node.ArrayInitializer.alloc(rp.c.arena, array_size);
+    init_node.* = .{
+        .lhs = &ty_node.base,
+        .rtoken = undefined,
+        .list_len = array_size,
+    };
+    const init_list = init_node.list();
+
+    var i: c_uint = 0;
+    const kind = stmt.getKind();
+    const narrow = kind == .Ascii or kind == .UTF8;
+    while (i < str_length and i < array_size) : (i += 1) {
+        const code_unit = stmt.getCodeUnit(i);
+        init_list[i] = try transCreateCharLitNode(rp.c, narrow, code_unit);
+        _ = try appendToken(rp.c, .Comma, ",");
+    }
+    while (i < array_size) : (i += 1) {
+        init_list[i] = try transCreateNodeInt(rp.c, 0);
+        _ = try appendToken(rp.c, .Comma, ",");
+    }
+    init_node.rtoken = try appendToken(rp.c, .RBrace, "}");
+
+    return &init_node.base;
+}
+
 fn cIsEnum(qt: clang.QualType) bool {
     return qt.getCanonicalType().getTypeClass() == .Enum;
 }
@@ -2343,6 +2428,18 @@ fn transCreateNodeArrayType(
     return &node.base;
 }
 
+fn transCreateEmptyArray(rp: RestorePoint, loc: clang.SourceLocation, ty: *const clang.Type) TransError!*ast.Node {
+    const ty_node = try transCreateNodeArrayType(rp, loc, ty, 0);
+    _ = try appendToken(rp.c, .LBrace, "{");
+    const filler_init_node = try ast.Node.ArrayInitializer.alloc(rp.c.arena, 0);
+    filler_init_node.* = .{
+        .lhs = ty_node,
+        .rtoken = try appendToken(rp.c, .RBrace, "}"),
+        .list_len = 0,
+    };
+    return &filler_init_node.base;
+}
+
 fn transInitListExprArray(
     rp: RestorePoint,
     scope: *Scope,
@@ -2360,6 +2457,10 @@ fn transInitListExprArray(
     const all_count = size_ap_int.getLimitedValue(math.maxInt(usize));
     const leftover_count = all_count - init_count;
 
+    if (all_count == 0) {
+        return transCreateEmptyArray(rp, loc, child_qt.getTypePtr());
+    }
+
     var init_node: *ast.Node.ArrayInitializer = undefined;
     var cat_tok: ast.TokenIndex = undefined;
     if (init_count != 0) {
@@ -2934,6 +3035,21 @@ fn transPredefinedExpr(rp: RestorePoint, scope: *Scope, expr: *const clang.Prede
     return transStringLiteral(rp, scope, expr.getFunctionName(), used);
 }
 
+fn transCreateCharLitNode(c: *Context, narrow: bool, val: u32) TransError!*ast.Node {
+    const node = try c.arena.create(ast.Node.OneToken);
+    node.* = .{
+        .base = .{ .tag = .CharLiteral },
+        .token = undefined,
+    };
+    if (narrow) {
+        const val_array = [_]u8{@intCast(u8, val)};
+        node.token = try appendTokenFmt(c, .CharLiteral, "'{}'", .{std.zig.fmtEscapes(&val_array)});
+    } else {
+        node.token = try appendTokenFmt(c, .CharLiteral, "'\\u{{{x}}}'", .{val});
+    }
+    return &node.base;
+}
+
 fn transCharLiteral(
     rp: RestorePoint,
     scope: *Scope,
@@ -2943,33 +3059,14 @@ fn transCharLiteral(
 ) TransError!*ast.Node {
     const kind = stmt.getKind();
     const val = stmt.getValue();
-    const int_lit_node = switch (kind) {
-        .Ascii, .UTF8 => blk: {
-            if (kind == .Ascii) {
-                // C has a somewhat obscure feature called multi-character character
-                // constant
-                if (val > 255)
-                    break :blk try transCreateNodeInt(rp.c, val);
-            }
-            const val_array = [_]u8 { @intCast(u8, val) };
-            const token = try appendTokenFmt(rp.c, .CharLiteral, "'{}'", .{std.zig.fmtEscapes(&val_array)});
-            const node = try rp.c.arena.create(ast.Node.OneToken);
-            node.* = .{
-                .base = .{ .tag = .CharLiteral },
-                .token = token,
-            };
-            break :blk &node.base;
-        },
-        .Wide, .UTF16, .UTF32 => blk: {
-            const token = try appendTokenFmt(rp.c, .CharLiteral, "'\\u{{{x}}}'", .{val});
-            const node = try rp.c.arena.create(ast.Node.OneToken);
-            node.* = .{
-                .base = .{ .tag = .CharLiteral },
-                .token = token,
-            };
-            break :blk &node.base;
-        },
-    };
+    const narrow = kind == .Ascii or kind == .UTF8;
+    // C has a somewhat obscure feature called multi-character character constant
+    // e.g. 'abcd'
+    const int_lit_node = if (kind == .Ascii and val > 255)
+        try transCreateNodeInt(rp.c, val)
+    else
+        try transCreateCharLitNode(rp.c, narrow, val);
+
     if (suppress_as == .no_as) {
         return maybeSuppressResult(rp, scope, result_used, int_lit_node);
     }
@@ -3891,6 +3988,38 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: *ast.Node) !void {
     _ = try c.global_scope.sym_table.put(name, decl_node);
 }
 
+/// Translate a qual type for a variable with an initializer. The initializer
+/// only matters for incomplete arrays, since the size of the array is determined
+/// by the size of the initializer
+fn transQualTypeInitialized(
+    rp: RestorePoint,
+    qt: clang.QualType,
+    decl_init: *const clang.Expr,
+    source_loc: clang.SourceLocation,
+) TypeError!*ast.Node {
+    const ty = qt.getTypePtr();
+    if (ty.getTypeClass() == .IncompleteArray) {
+        const incomplete_array_ty = @ptrCast(*const clang.IncompleteArrayType, ty);
+        const elem_ty = incomplete_array_ty.getElementType().getTypePtr();
+
+        switch (decl_init.getStmtClass()) {
+            .StringLiteralClass => {
+                const string_lit = @ptrCast(*const clang.StringLiteral, decl_init);
+                const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator
+                const array_size = @intCast(usize, string_lit_size);
+                return transCreateNodeArrayType(rp, source_loc, elem_ty, array_size);
+            },
+            .InitListExprClass => {
+                const init_expr = @ptrCast(*const clang.InitListExpr, decl_init);
+                const size = init_expr.getNumInits();
+                return transCreateNodeArrayType(rp, source_loc, elem_ty, size);
+            },
+            else => {},
+        }
+    }
+    return transQualType(rp, qt, source_loc);
+}
+
 fn transQualType(rp: RestorePoint, qt: clang.QualType, source_loc: clang.SourceLocation) TypeError!*ast.Node {
     return transType(rp, qt.getTypePtr(), source_loc);
 }
src/zig_clang.cpp
@@ -2504,6 +2504,21 @@ enum ZigClangStringLiteral_StringKind ZigClangStringLiteral_getKind(const struct
     return (ZigClangStringLiteral_StringKind)casted->getKind();
 }
 
+uint32_t ZigClangStringLiteral_getCodeUnit(const struct ZigClangStringLiteral *self, size_t i) {
+    auto casted = reinterpret_cast<const clang::StringLiteral *>(self);
+    return casted->getCodeUnit(i);
+}
+
+unsigned ZigClangStringLiteral_getLength(const struct ZigClangStringLiteral *self) {
+    auto casted = reinterpret_cast<const clang::StringLiteral *>(self);
+    return casted->getLength();
+}
+
+unsigned ZigClangStringLiteral_getCharByteWidth(const struct ZigClangStringLiteral *self) {
+    auto casted = reinterpret_cast<const clang::StringLiteral *>(self);
+    return casted->getCharByteWidth();
+}
+
 const char *ZigClangStringLiteral_getString_bytes_begin_size(const struct ZigClangStringLiteral *self, size_t *len) {
     auto casted = reinterpret_cast<const clang::StringLiteral *>(self);
     llvm::StringRef str_ref = casted->getString();
src/zig_clang.h
@@ -1126,6 +1126,10 @@ ZIG_EXTERN_C unsigned ZigClangAPFloat_convertToHexString(const struct ZigClangAP
 ZIG_EXTERN_C double ZigClangFloatingLiteral_getValueAsApproximateDouble(const ZigClangFloatingLiteral *self);
 
 ZIG_EXTERN_C enum ZigClangStringLiteral_StringKind ZigClangStringLiteral_getKind(const struct ZigClangStringLiteral *self);
+ZIG_EXTERN_C uint32_t ZigClangStringLiteral_getCodeUnit(const struct ZigClangStringLiteral *self, size_t i);
+ZIG_EXTERN_C unsigned ZigClangStringLiteral_getLength(const struct ZigClangStringLiteral *self);
+ZIG_EXTERN_C unsigned ZigClangStringLiteral_getCharByteWidth(const struct ZigClangStringLiteral *self);
+
 ZIG_EXTERN_C const char *ZigClangStringLiteral_getString_bytes_begin_size(const struct ZigClangStringLiteral *self,
         size_t *len);
 
test/run_translated_c.zig
@@ -746,4 +746,52 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void {
         \\    return 0;
         \\}
     , "1 2" ++ nl);
+
+    cases.add("multi-character character constant",
+        \\#include <stdlib.h>
+        \\int main(void) {
+        \\    int foo = 'abcd';
+        \\    switch (foo) {
+        \\        case 'abcd': break;
+        \\        default: abort();
+        \\    }
+        \\    return 0;
+        \\}
+    , "");
+
+    cases.add("Array initializers (string literals, incomplete arrays)",
+        \\#include <stdlib.h>
+        \\#include <string.h>
+        \\extern int foo[];
+        \\int global_arr[] = {1, 2, 3};
+        \\char global_string[] = "hello";
+        \\int main(int argc, char *argv[]) {
+        \\    if (global_arr[2] != 3) abort();
+        \\    if (strlen(global_string) != 5) abort();
+        \\    const char *const_str = "hello";
+        \\    if (strcmp(const_str, "hello") != 0) abort();
+        \\    char empty_str[] = "";
+        \\    if (strlen(empty_str) != 0) abort();
+        \\    char hello[] = "hello";
+        \\    if (strlen(hello) != 5 || sizeof(hello) != 6) abort();
+        \\    int empty[] = {};
+        \\    if (sizeof(empty) != 0) abort();
+        \\    int bar[] = {42};
+        \\    if (bar[0] != 42) abort();
+        \\    bar[0] = 43;
+        \\    if (bar[0] != 43) abort();
+        \\    int baz[] = {1, [42] = 123, 456};
+        \\    if (baz[42] != 123 || baz[43] != 456) abort();
+        \\    if (sizeof(baz) != sizeof(int) * 44) abort();
+        \\    const char *const names[] = {"first", "second", "third"};
+        \\    if (strcmp(names[2], "third") != 0) abort();
+        \\    char catted_str[] = "abc" "def";
+        \\    if (strlen(catted_str) != 6 || sizeof(catted_str) != 7) abort();
+        \\    char catted_trunc_str[2] = "abc" "def";
+        \\    if (sizeof(catted_trunc_str) != 2 || catted_trunc_str[0] != 'a' || catted_trunc_str[1] != 'b') abort();
+        \\    char big_array_utf8lit[10] = "💯";
+        \\    if (strcmp(big_array_utf8lit, "💯") != 0 || big_array_utf8lit[9] != 0) abort();
+        \\    return 0;
+        \\}
+    , "");
 }
test/translate_c.zig
@@ -539,7 +539,14 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    static const char v2[] = "2.2.2";
         \\}
     , &[_][]const u8{
-        \\const v2: [*c]const u8 = "2.2.2";
+        \\const v2: [6]u8 = [6]u8{
+        \\    '2',
+        \\    '.',
+        \\    '2',
+        \\    '.',
+        \\    '2',
+        \\    0,
+        \\};
         \\pub export fn foo() void {
         \\    _ = v2;
         \\}
@@ -1395,9 +1402,30 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\static char arr1[] = "hello";
         \\char arr2[] = "hello";
     , &[_][]const u8{
-        \\pub export var arr0: [*c]u8 = "hello";
-        \\pub var arr1: [*c]u8 = "hello";
-        \\pub export var arr2: [*c]u8 = "hello";
+        \\pub export var arr0: [6]u8 = [6]u8{
+        \\    'h',
+        \\    'e',
+        \\    'l',
+        \\    'l',
+        \\    'o',
+        \\    0,
+        \\};
+        \\pub var arr1: [6]u8 = [6]u8{
+        \\    'h',
+        \\    'e',
+        \\    'l',
+        \\    'l',
+        \\    'o',
+        \\    0,
+        \\};
+        \\pub export var arr2: [6]u8 = [6]u8{
+        \\    'h',
+        \\    'e',
+        \\    'l',
+        \\    'l',
+        \\    'o',
+        \\    0,
+        \\};
     });
 
     cases.add("array initializer expr",