Commit b54514d9dd

Evan Haas <evan@lagerdata.com>
2021-03-18 13:41:04
translate-c: Use [N:0] arrays when initializer is a string literal (#8264)
* translate-c: Use [N:0] arrays when initializer is a string literal Translate incomplete arrays as [N:0] when initialized by a string literal. This preserves a bit more of the type information from the original C program. Fixes #8215
1 parent 75a7abb
Changed files (3)
src/translate_c/ast.zig
@@ -40,6 +40,8 @@ pub const Node = extern union {
         string_literal,
         char_literal,
         enum_literal,
+        /// "string"[0..end]
+        string_slice,
         identifier,
         @"if",
         /// if (!operand) break;
@@ -176,6 +178,7 @@ pub const Node = extern union {
         c_pointer,
         single_pointer,
         array_type,
+        null_sentinel_array_type,
 
         /// @import("std").meta.sizeof(operand)
         std_meta_sizeof,
@@ -334,7 +337,7 @@ pub const Node = extern union {
                 .std_meta_promoteIntLiteral => Payload.PromoteIntLiteral,
                 .block => Payload.Block,
                 .c_pointer, .single_pointer => Payload.Pointer,
-                .array_type => Payload.Array,
+                .array_type, .null_sentinel_array_type => Payload.Array,
                 .arg_redecl, .alias, .fail_decl => Payload.ArgRedecl,
                 .log2_int_type => Payload.Log2IntType,
                 .var_simple, .pub_var_simple => Payload.SimpleVarDecl,
@@ -342,6 +345,7 @@ pub const Node = extern union {
                 .array_filler => Payload.ArrayFiller,
                 .pub_inline_fn => Payload.PubInlineFn,
                 .field_access => Payload.FieldAccess,
+                .string_slice => Payload.StringSlice,
             };
         }
 
@@ -584,10 +588,12 @@ pub const Payload = struct {
 
     pub const Array = struct {
         base: Payload,
-        data: struct {
+        data: ArrayTypeInfo,
+
+        pub const ArrayTypeInfo = struct {
             elem_type: Node,
             len: usize,
-        },
+        };
     };
 
     pub const Pointer = struct {
@@ -664,6 +670,14 @@ pub const Payload = struct {
             radix: Node,
         },
     };
+
+    pub const StringSlice = struct {
+        base: Payload,
+        data: struct {
+            string: Node,
+            end: usize,
+        },
+    };
 };
 
 /// Converts the nodes into a Zig ast.
@@ -1015,6 +1029,36 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
                 .data = undefined,
             });
         },
+        .string_slice => {
+            const payload = node.castTag(.string_slice).?.data;
+
+            const string = try renderNode(c, payload.string);
+            const l_bracket = try c.addToken(.l_bracket, "[");
+            const start = try c.addNode(.{
+                .tag = .integer_literal,
+                .main_token = try c.addToken(.integer_literal, "0"),
+                .data = undefined,
+            });
+            _ = try c.addToken(.ellipsis2, "..");
+            const end = try c.addNode(.{
+                .tag = .integer_literal,
+                .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{payload.end}),
+                .data = undefined,
+            });
+            _ = try c.addToken(.r_bracket, "]");
+
+            return c.addNode(.{
+                .tag = .slice,
+                .main_token = l_bracket,
+                .data = .{
+                    .lhs = string,
+                    .rhs = try c.addExtra(std.zig.ast.Node.Slice{
+                        .start = start,
+                        .end = end,
+                    }),
+                },
+            });
+        },
         .fail_decl => {
             const payload = node.castTag(.fail_decl).?.data;
             // pub const name = @compileError(msg);
@@ -1581,6 +1625,10 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
             const payload = node.castTag(.array_type).?.data;
             return renderArrayType(c, payload.len, payload.elem_type);
         },
+        .null_sentinel_array_type => {
+            const payload = node.castTag(.null_sentinel_array_type).?.data;
+            return renderNullSentinelArrayType(c, payload.len, payload.elem_type);
+        },
         .array_filler => {
             const payload = node.castTag(.array_filler).?.data;
 
@@ -1946,6 +1994,36 @@ fn renderArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
     });
 }
 
+fn renderNullSentinelArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
+    const l_bracket = try c.addToken(.l_bracket, "[");
+    const len_expr = try c.addNode(.{
+        .tag = .integer_literal,
+        .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{len}),
+        .data = undefined,
+    });
+    _ = try c.addToken(.colon, ":");
+
+    const sentinel_expr = try c.addNode(.{
+        .tag = .integer_literal,
+        .main_token = try c.addToken(.integer_literal, "0"),
+        .data = undefined,
+    });
+
+    _ = try c.addToken(.r_bracket, "]");
+    const elem_type_expr = try renderNode(c, elem_type);
+    return c.addNode(.{
+        .tag = .array_type_sentinel,
+        .main_token = l_bracket,
+        .data = .{
+            .lhs = len_expr,
+            .rhs = try c.addExtra(std.zig.ast.Node.ArrayTypeSentinel {
+                .sentinel = sentinel_expr,
+                .elem_type = elem_type_expr,
+            }),
+        },
+    });
+}
+
 fn addSemicolonIfNeeded(c: *Context, node: Node) !void {
     switch (node.tag()) {
         .warning => unreachable,
@@ -2014,6 +2092,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
         .integer_literal,
         .float_literal,
         .string_literal,
+        .string_slice,
         .char_literal,
         .enum_literal,
         .identifier,
@@ -2035,6 +2114,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
         .func,
         .call,
         .array_type,
+        .null_sentinel_array_type,
         .bool_to_int,
         .div_exact,
         .byte_offset_of,
src/translate_c.zig
@@ -636,7 +636,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
     if (has_init) trans_init: {
         if (decl_init) |expr| {
             const node_or_error = if (expr.getStmtClass() == .StringLiteralClass)
-                transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(c, type_node) catch 0)
+                transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
             else
                 transExprCoercing(c, scope, expr, .used);
             init_node = node_or_error catch |err| switch (err) {
@@ -1412,7 +1412,7 @@ fn transDeclStmtOne(
 
             var init_node = if (decl_init) |expr|
                 if (expr.getStmtClass() == .StringLiteralClass)
-                    try transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(c, type_node))
+                    try transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
                 else
                     try transExprCoercing(c, scope, expr, .used)
             else
@@ -1758,6 +1758,20 @@ fn transReturnStmt(
     return Tag.@"return".create(c.arena, rhs);
 }
 
+fn transNarrowStringLiteral(
+    c: *Context,
+    scope: *Scope,
+    stmt: *const clang.StringLiteral,
+    result_used: ResultUsed,
+) TransError!Node {
+    var len: usize = undefined;
+    const bytes_ptr = stmt.getString_bytes_begin_size(&len);
+
+    const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
+    const node = try Tag.string_literal.create(c.arena, str);
+    return maybeSuppressResult(c, scope, result_used, node);
+}
+
 fn transStringLiteral(
     c: *Context,
     scope: *Scope,
@@ -1766,19 +1780,14 @@ fn transStringLiteral(
 ) TransError!Node {
     const kind = stmt.getKind();
     switch (kind) {
-        .Ascii, .UTF8 => {
-            var len: usize = undefined;
-            const bytes_ptr = stmt.getString_bytes_begin_size(&len);
-
-            const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
-            const node = try Tag.string_literal.create(c.arena, str);
-            return maybeSuppressResult(c, scope, result_used, node);
-        },
+        .Ascii, .UTF8 => return transNarrowStringLiteral(c, scope, stmt, result_used),
         .UTF16, .UTF32, .Wide => {
             const str_type = @tagName(stmt.getKind());
             const name = try std.fmt.allocPrint(c.arena, "zig.{s}_string_{d}", .{ str_type, c.getMangle() });
-            const lit_array = try transStringLiteralAsArray(c, scope, stmt, stmt.getLength() + 1);
 
+            const expr_base = @ptrCast(*const clang.Expr, stmt);
+            const array_type = try transQualTypeInitialized(c, scope, expr_base.getType(), expr_base, expr_base.getBeginLoc());
+            const lit_array = try transStringLiteralInitializer(c, scope, stmt, array_type);
             const decl = try Tag.var_simple.create(c.arena, .{ .name = name, .init = lit_array });
             try scope.appendNode(decl);
             const node = try Tag.identifier.create(c.arena, name);
@@ -1787,52 +1796,67 @@ fn transStringLiteral(
     }
 }
 
-/// Parse the size of an array back out from an ast Node.
-fn zigArraySize(c: *Context, node: Node) TransError!usize {
-    if (node.castTag(.array_type)) |array| {
-        return array.data.len;
-    }
-    return error.UnsupportedTranslation;
+fn getArrayPayload(array_type: Node) ast.Payload.Array.ArrayTypeInfo {
+    return (array_type.castTag(.array_type) orelse array_type.castTag(.null_sentinel_array_type).?).data;
 }
 
-/// Translate a string literal to an array of integers. Used when an
-/// array is initialized from a string literal. `array_size` is the
-/// size of the array being initialized. If the string literal is larger
-/// than the array, truncate the string. If the array is larger than the
-/// string literal, pad the array with 0's
-fn transStringLiteralAsArray(
+/// Translate a string literal that is initializing an array. In general narrow string
+/// literals become `"<string>".*` or `"<string>"[0..<size>].*` if they need truncation.
+/// Wide string literals become an array of integers. zero-fillers pad out the array to
+/// the appropriate length, if necessary.
+fn transStringLiteralInitializer(
     c: *Context,
     scope: *Scope,
     stmt: *const clang.StringLiteral,
-    array_size: usize,
+    array_type: Node,
 ) TransError!Node {
-    if (array_size == 0) return error.UnsupportedType;
+    assert(array_type.tag() == .array_type or array_type.tag() == .null_sentinel_array_type);
+
+    const is_narrow = stmt.getKind() == .Ascii or stmt.getKind() == .UTF8;
 
     const str_length = stmt.getLength();
+    const payload = getArrayPayload(array_type);
+    const array_size = payload.len;
+    const elem_type = payload.elem_type;
+
+    if (array_size == 0) return Tag.empty_array.create(c.arena, elem_type);
+
+    const num_inits = math.min(str_length, array_size);
+    const init_node = if (num_inits > 0) blk: {
+        if (is_narrow) {
+            // "string literal".* or string literal"[0..num_inits].*
+            var str = try transNarrowStringLiteral(c, scope, stmt, .used);
+            if (str_length != array_size) str = try Tag.string_slice.create(c.arena, .{ .string = str, .end = num_inits });
+            break :blk try Tag.deref.create(c.arena, str);
+        } else {
+            const init_list = try c.arena.alloc(Node, num_inits);
+            var i: c_uint = 0;
+            while (i < num_inits) : (i += 1) {
+                init_list[i] = try transCreateCharLitNode(c, false, stmt.getCodeUnit(i));
+            }
+            const init_args = .{ .len = num_inits, .elem_type = elem_type };
+            const init_array_type = try if (array_type.tag() == .array_type) Tag.array_type.create(c.arena, init_args) else Tag.null_sentinel_array_type.create(c.arena, init_args);
+            break :blk try Tag.array_init.create(c.arena, .{
+                .cond = init_array_type,
+                .cases = init_list,
+            });
+        }
+    } else null;
 
-    const expr_base = @ptrCast(*const clang.Expr, stmt);
-    const ty = expr_base.getType().getTypePtr();
-    const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty);
+    if (num_inits == array_size) return init_node.?; // init_node is only null if num_inits == 0; but if num_inits == array_size == 0 we've already returned
+    assert(array_size > str_length); // If array_size <= str_length, `num_inits == array_size` and we've already returned.
 
-    const elem_type = try transQualType(c, scope, const_arr_ty.getElementType(), expr_base.getBeginLoc());
-    const arr_type = try Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_type });
-    const init_list = try c.arena.alloc(Node, array_size);
+    const filler_node = try Tag.array_filler.create(c.arena, .{
+        .type = elem_type,
+        .filler = Tag.zero_literal.init(),
+        .count = array_size - str_length,
+    });
 
-    var i: c_uint = 0;
-    const kind = stmt.getKind();
-    const narrow = kind == .Ascii or kind == .UTF8;
-    while (i < str_length and i < array_size) : (i += 1) {
-        const code_unit = stmt.getCodeUnit(i);
-        init_list[i] = try transCreateCharLitNode(c, narrow, code_unit);
-    }
-    while (i < array_size) : (i += 1) {
-        init_list[i] = try transCreateNodeNumber(c, 0, .int);
+    if (init_node) |some| {
+        return Tag.array_cat.create(c.arena, .{ .lhs = some, .rhs = filler_node });
+    } else {
+        return filler_node;
     }
-
-    return Tag.array_init.create(c.arena, .{
-        .cond = arr_type,
-        .cases = init_list,
-    });
 }
 
 /// determine whether `stmt` is a "pointer subtraction expression" - a subtraction where
@@ -3342,9 +3366,8 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: Node) !void {
     try c.global_scope.nodes.append(decl_node);
 }
 
-/// Translate a qual type for a variable with an initializer. The initializer
-/// only matters for incomplete arrays, since the size of the array is determined
-/// by the size of the initializer
+/// Translate a qualtype for a variable with an initializer. This only matters
+/// for incomplete arrays, since the initializer determines the size of the array.
 fn transQualTypeInitialized(
     c: *Context,
     scope: *Scope,
@@ -3360,9 +3383,14 @@ fn transQualTypeInitialized(
         switch (decl_init.getStmtClass()) {
             .StringLiteralClass => {
                 const string_lit = @ptrCast(*const clang.StringLiteral, decl_init);
-                const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator
+                const string_lit_size = string_lit.getLength();
                 const array_size = @intCast(usize, string_lit_size);
-                return Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
+
+                // incomplete array initialized with empty string, will be translated as [1]T{0}
+                // see https://github.com/ziglang/zig/issues/8256
+                if (array_size == 0) return Tag.array_type.create(c.arena, .{ .len = 1, .elem_type = elem_ty });
+
+                return Tag.null_sentinel_array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
             },
             .InitListExprClass => {
                 const init_expr = @ptrCast(*const clang.InitListExpr, decl_init);
test/translate_c.zig
@@ -745,14 +745,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\    static const char v2[] = "2.2.2";
         \\}
     , &[_][]const u8{
-        \\const v2: [6]u8 = [6]u8{
-        \\    '2',
-        \\    '.',
-        \\    '2',
-        \\    '.',
-        \\    '2',
-        \\    0,
-        \\};
+        \\const v2: [5:0]u8 = "2.2.2".*;
         \\pub export fn foo() void {}
     });
 
@@ -1600,30 +1593,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
         \\static char arr1[] = "hello";
         \\char arr2[] = "hello";
     , &[_][]const u8{
-        \\pub export var arr0: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
-        \\pub var arr1: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
-        \\pub export var arr2: [6]u8 = [6]u8{
-        \\    'h',
-        \\    'e',
-        \\    'l',
-        \\    'l',
-        \\    'o',
-        \\    0,
-        \\};
+        \\pub export var arr0: [5:0]u8 = "hello".*;
+        \\pub var arr1: [5:0]u8 = "hello".*;
+        \\pub export var arr2: [5:0]u8 = "hello".*;
     });
 
     cases.add("array initializer expr",
@@ -3425,4 +3397,49 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
     , &[_][]const u8{
         \\pub const FOO = @compileError("TODO implement function '__builtin_alloca_with_align' in std.c.builtins");
     });
+
+    cases.add("null sentinel arrays when initialized from string literal. Issue #8256",
+        \\#include <stdint.h>
+        \\char zero[0] = "abc";
+        \\uint32_t zero_w[0] = U"๐Ÿ’ฏ๐Ÿ’ฏ๐Ÿ’ฏ";
+        \\char empty_incomplete[] = "";
+        \\uint32_t empty_incomplete_w[] = U"";
+        \\char empty_constant[100] = "";
+        \\uint32_t empty_constant_w[100] = U"";
+        \\char incomplete[] = "abc";
+        \\uint32_t incomplete_w[] = U"๐Ÿ’ฏ๐Ÿ’ฏ๐Ÿ’ฏ";
+        \\char truncated[1] = "abc";
+        \\uint32_t truncated_w[1] = U"๐Ÿ’ฏ๐Ÿ’ฏ๐Ÿ’ฏ";
+        \\char extend[5] = "a";
+        \\uint32_t extend_w[5] = U"๐Ÿ’ฏ";
+        \\char no_null[3] = "abc";
+        \\uint32_t no_null_w[3] = U"๐Ÿ’ฏ๐Ÿ’ฏ๐Ÿ’ฏ";
+    , &[_][]const u8{
+        \\pub export var zero: [0]u8 = [0]u8{};
+        \\pub export var zero_w: [0]u32 = [0]u32{};
+        \\pub export var empty_incomplete: [1]u8 = [1]u8{0} ** 1;
+        \\pub export var empty_incomplete_w: [1]u32 = [1]u32{0} ** 1;
+        \\pub export var empty_constant: [100]u8 = [1]u8{0} ** 100;
+        \\pub export var empty_constant_w: [100]u32 = [1]u32{0} ** 100;
+        \\pub export var incomplete: [3:0]u8 = "abc".*;
+        \\pub export var incomplete_w: [3:0]u32 = [3:0]u32{
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\};
+        \\pub export var truncated: [1]u8 = "abc"[0..1].*;
+        \\pub export var truncated_w: [1]u32 = [1]u32{
+        \\    '\u{1f4af}',
+        \\};
+        \\pub export var extend: [5]u8 = "a"[0..1].* ++ [1]u8{0} ** 4;
+        \\pub export var extend_w: [5]u32 = [1]u32{
+        \\    '\u{1f4af}',
+        \\} ++ [1]u32{0} ** 4;
+        \\pub export var no_null: [3]u8 = "abc".*;
+        \\pub export var no_null_w: [3]u32 = [3]u32{
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\    '\u{1f4af}',
+        \\};
+    });
 }