Commit b54514d9dd
Changed files (3)
src
translate_c
test
src/translate_c/ast.zig
@@ -40,6 +40,8 @@ pub const Node = extern union {
string_literal,
char_literal,
enum_literal,
+ /// "string"[0..end]
+ string_slice,
identifier,
@"if",
/// if (!operand) break;
@@ -176,6 +178,7 @@ pub const Node = extern union {
c_pointer,
single_pointer,
array_type,
+ null_sentinel_array_type,
/// @import("std").meta.sizeof(operand)
std_meta_sizeof,
@@ -334,7 +337,7 @@ pub const Node = extern union {
.std_meta_promoteIntLiteral => Payload.PromoteIntLiteral,
.block => Payload.Block,
.c_pointer, .single_pointer => Payload.Pointer,
- .array_type => Payload.Array,
+ .array_type, .null_sentinel_array_type => Payload.Array,
.arg_redecl, .alias, .fail_decl => Payload.ArgRedecl,
.log2_int_type => Payload.Log2IntType,
.var_simple, .pub_var_simple => Payload.SimpleVarDecl,
@@ -342,6 +345,7 @@ pub const Node = extern union {
.array_filler => Payload.ArrayFiller,
.pub_inline_fn => Payload.PubInlineFn,
.field_access => Payload.FieldAccess,
+ .string_slice => Payload.StringSlice,
};
}
@@ -584,10 +588,12 @@ pub const Payload = struct {
pub const Array = struct {
base: Payload,
- data: struct {
+ data: ArrayTypeInfo,
+
+ pub const ArrayTypeInfo = struct {
elem_type: Node,
len: usize,
- },
+ };
};
pub const Pointer = struct {
@@ -664,6 +670,14 @@ pub const Payload = struct {
radix: Node,
},
};
+
+ pub const StringSlice = struct {
+ base: Payload,
+ data: struct {
+ string: Node,
+ end: usize,
+ },
+ };
};
/// Converts the nodes into a Zig ast.
@@ -1015,6 +1029,36 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
.data = undefined,
});
},
+ .string_slice => {
+ const payload = node.castTag(.string_slice).?.data;
+
+ const string = try renderNode(c, payload.string);
+ const l_bracket = try c.addToken(.l_bracket, "[");
+ const start = try c.addNode(.{
+ .tag = .integer_literal,
+ .main_token = try c.addToken(.integer_literal, "0"),
+ .data = undefined,
+ });
+ _ = try c.addToken(.ellipsis2, "..");
+ const end = try c.addNode(.{
+ .tag = .integer_literal,
+ .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{payload.end}),
+ .data = undefined,
+ });
+ _ = try c.addToken(.r_bracket, "]");
+
+ return c.addNode(.{
+ .tag = .slice,
+ .main_token = l_bracket,
+ .data = .{
+ .lhs = string,
+ .rhs = try c.addExtra(std.zig.ast.Node.Slice{
+ .start = start,
+ .end = end,
+ }),
+ },
+ });
+ },
.fail_decl => {
const payload = node.castTag(.fail_decl).?.data;
// pub const name = @compileError(msg);
@@ -1581,6 +1625,10 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
const payload = node.castTag(.array_type).?.data;
return renderArrayType(c, payload.len, payload.elem_type);
},
+ .null_sentinel_array_type => {
+ const payload = node.castTag(.null_sentinel_array_type).?.data;
+ return renderNullSentinelArrayType(c, payload.len, payload.elem_type);
+ },
.array_filler => {
const payload = node.castTag(.array_filler).?.data;
@@ -1946,6 +1994,36 @@ fn renderArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
});
}
+fn renderNullSentinelArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
+ const l_bracket = try c.addToken(.l_bracket, "[");
+ const len_expr = try c.addNode(.{
+ .tag = .integer_literal,
+ .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{len}),
+ .data = undefined,
+ });
+ _ = try c.addToken(.colon, ":");
+
+ const sentinel_expr = try c.addNode(.{
+ .tag = .integer_literal,
+ .main_token = try c.addToken(.integer_literal, "0"),
+ .data = undefined,
+ });
+
+ _ = try c.addToken(.r_bracket, "]");
+ const elem_type_expr = try renderNode(c, elem_type);
+ return c.addNode(.{
+ .tag = .array_type_sentinel,
+ .main_token = l_bracket,
+ .data = .{
+ .lhs = len_expr,
+ .rhs = try c.addExtra(std.zig.ast.Node.ArrayTypeSentinel {
+ .sentinel = sentinel_expr,
+ .elem_type = elem_type_expr,
+ }),
+ },
+ });
+}
+
fn addSemicolonIfNeeded(c: *Context, node: Node) !void {
switch (node.tag()) {
.warning => unreachable,
@@ -2014,6 +2092,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
.integer_literal,
.float_literal,
.string_literal,
+ .string_slice,
.char_literal,
.enum_literal,
.identifier,
@@ -2035,6 +2114,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
.func,
.call,
.array_type,
+ .null_sentinel_array_type,
.bool_to_int,
.div_exact,
.byte_offset_of,
src/translate_c.zig
@@ -636,7 +636,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
if (has_init) trans_init: {
if (decl_init) |expr| {
const node_or_error = if (expr.getStmtClass() == .StringLiteralClass)
- transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(c, type_node) catch 0)
+ transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
else
transExprCoercing(c, scope, expr, .used);
init_node = node_or_error catch |err| switch (err) {
@@ -1412,7 +1412,7 @@ fn transDeclStmtOne(
var init_node = if (decl_init) |expr|
if (expr.getStmtClass() == .StringLiteralClass)
- try transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(c, type_node))
+ try transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
else
try transExprCoercing(c, scope, expr, .used)
else
@@ -1758,6 +1758,20 @@ fn transReturnStmt(
return Tag.@"return".create(c.arena, rhs);
}
+fn transNarrowStringLiteral(
+ c: *Context,
+ scope: *Scope,
+ stmt: *const clang.StringLiteral,
+ result_used: ResultUsed,
+) TransError!Node {
+ var len: usize = undefined;
+ const bytes_ptr = stmt.getString_bytes_begin_size(&len);
+
+ const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
+ const node = try Tag.string_literal.create(c.arena, str);
+ return maybeSuppressResult(c, scope, result_used, node);
+}
+
fn transStringLiteral(
c: *Context,
scope: *Scope,
@@ -1766,19 +1780,14 @@ fn transStringLiteral(
) TransError!Node {
const kind = stmt.getKind();
switch (kind) {
- .Ascii, .UTF8 => {
- var len: usize = undefined;
- const bytes_ptr = stmt.getString_bytes_begin_size(&len);
-
- const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
- const node = try Tag.string_literal.create(c.arena, str);
- return maybeSuppressResult(c, scope, result_used, node);
- },
+ .Ascii, .UTF8 => return transNarrowStringLiteral(c, scope, stmt, result_used),
.UTF16, .UTF32, .Wide => {
const str_type = @tagName(stmt.getKind());
const name = try std.fmt.allocPrint(c.arena, "zig.{s}_string_{d}", .{ str_type, c.getMangle() });
- const lit_array = try transStringLiteralAsArray(c, scope, stmt, stmt.getLength() + 1);
+ const expr_base = @ptrCast(*const clang.Expr, stmt);
+ const array_type = try transQualTypeInitialized(c, scope, expr_base.getType(), expr_base, expr_base.getBeginLoc());
+ const lit_array = try transStringLiteralInitializer(c, scope, stmt, array_type);
const decl = try Tag.var_simple.create(c.arena, .{ .name = name, .init = lit_array });
try scope.appendNode(decl);
const node = try Tag.identifier.create(c.arena, name);
@@ -1787,52 +1796,67 @@ fn transStringLiteral(
}
}
-/// Parse the size of an array back out from an ast Node.
-fn zigArraySize(c: *Context, node: Node) TransError!usize {
- if (node.castTag(.array_type)) |array| {
- return array.data.len;
- }
- return error.UnsupportedTranslation;
+fn getArrayPayload(array_type: Node) ast.Payload.Array.ArrayTypeInfo {
+ return (array_type.castTag(.array_type) orelse array_type.castTag(.null_sentinel_array_type).?).data;
}
-/// Translate a string literal to an array of integers. Used when an
-/// array is initialized from a string literal. `array_size` is the
-/// size of the array being initialized. If the string literal is larger
-/// than the array, truncate the string. If the array is larger than the
-/// string literal, pad the array with 0's
-fn transStringLiteralAsArray(
+/// Translate a string literal that is initializing an array. In general narrow string
+/// literals become `"<string>".*` or `"<string>"[0..<size>].*` if they need truncation.
+/// Wide string literals become an array of integers. zero-fillers pad out the array to
+/// the appropriate length, if necessary.
+fn transStringLiteralInitializer(
c: *Context,
scope: *Scope,
stmt: *const clang.StringLiteral,
- array_size: usize,
+ array_type: Node,
) TransError!Node {
- if (array_size == 0) return error.UnsupportedType;
+ assert(array_type.tag() == .array_type or array_type.tag() == .null_sentinel_array_type);
+
+ const is_narrow = stmt.getKind() == .Ascii or stmt.getKind() == .UTF8;
const str_length = stmt.getLength();
+ const payload = getArrayPayload(array_type);
+ const array_size = payload.len;
+ const elem_type = payload.elem_type;
+
+ if (array_size == 0) return Tag.empty_array.create(c.arena, elem_type);
+
+ const num_inits = math.min(str_length, array_size);
+ const init_node = if (num_inits > 0) blk: {
+ if (is_narrow) {
+ // "string literal".* or string literal"[0..num_inits].*
+ var str = try transNarrowStringLiteral(c, scope, stmt, .used);
+ if (str_length != array_size) str = try Tag.string_slice.create(c.arena, .{ .string = str, .end = num_inits });
+ break :blk try Tag.deref.create(c.arena, str);
+ } else {
+ const init_list = try c.arena.alloc(Node, num_inits);
+ var i: c_uint = 0;
+ while (i < num_inits) : (i += 1) {
+ init_list[i] = try transCreateCharLitNode(c, false, stmt.getCodeUnit(i));
+ }
+ const init_args = .{ .len = num_inits, .elem_type = elem_type };
+ const init_array_type = try if (array_type.tag() == .array_type) Tag.array_type.create(c.arena, init_args) else Tag.null_sentinel_array_type.create(c.arena, init_args);
+ break :blk try Tag.array_init.create(c.arena, .{
+ .cond = init_array_type,
+ .cases = init_list,
+ });
+ }
+ } else null;
- const expr_base = @ptrCast(*const clang.Expr, stmt);
- const ty = expr_base.getType().getTypePtr();
- const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty);
+ if (num_inits == array_size) return init_node.?; // init_node is only null if num_inits == 0; but if num_inits == array_size == 0 we've already returned
+ assert(array_size > str_length); // If array_size <= str_length, `num_inits == array_size` and we've already returned.
- const elem_type = try transQualType(c, scope, const_arr_ty.getElementType(), expr_base.getBeginLoc());
- const arr_type = try Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_type });
- const init_list = try c.arena.alloc(Node, array_size);
+ const filler_node = try Tag.array_filler.create(c.arena, .{
+ .type = elem_type,
+ .filler = Tag.zero_literal.init(),
+ .count = array_size - str_length,
+ });
- var i: c_uint = 0;
- const kind = stmt.getKind();
- const narrow = kind == .Ascii or kind == .UTF8;
- while (i < str_length and i < array_size) : (i += 1) {
- const code_unit = stmt.getCodeUnit(i);
- init_list[i] = try transCreateCharLitNode(c, narrow, code_unit);
- }
- while (i < array_size) : (i += 1) {
- init_list[i] = try transCreateNodeNumber(c, 0, .int);
+ if (init_node) |some| {
+ return Tag.array_cat.create(c.arena, .{ .lhs = some, .rhs = filler_node });
+ } else {
+ return filler_node;
}
-
- return Tag.array_init.create(c.arena, .{
- .cond = arr_type,
- .cases = init_list,
- });
}
/// determine whether `stmt` is a "pointer subtraction expression" - a subtraction where
@@ -3342,9 +3366,8 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: Node) !void {
try c.global_scope.nodes.append(decl_node);
}
-/// Translate a qual type for a variable with an initializer. The initializer
-/// only matters for incomplete arrays, since the size of the array is determined
-/// by the size of the initializer
+/// Translate a qualtype for a variable with an initializer. This only matters
+/// for incomplete arrays, since the initializer determines the size of the array.
fn transQualTypeInitialized(
c: *Context,
scope: *Scope,
@@ -3360,9 +3383,14 @@ fn transQualTypeInitialized(
switch (decl_init.getStmtClass()) {
.StringLiteralClass => {
const string_lit = @ptrCast(*const clang.StringLiteral, decl_init);
- const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator
+ const string_lit_size = string_lit.getLength();
const array_size = @intCast(usize, string_lit_size);
- return Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
+
+ // incomplete array initialized with empty string, will be translated as [1]T{0}
+ // see https://github.com/ziglang/zig/issues/8256
+ if (array_size == 0) return Tag.array_type.create(c.arena, .{ .len = 1, .elem_type = elem_ty });
+
+ return Tag.null_sentinel_array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
},
.InitListExprClass => {
const init_expr = @ptrCast(*const clang.InitListExpr, decl_init);
test/translate_c.zig
@@ -745,14 +745,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
\\ static const char v2[] = "2.2.2";
\\}
, &[_][]const u8{
- \\const v2: [6]u8 = [6]u8{
- \\ '2',
- \\ '.',
- \\ '2',
- \\ '.',
- \\ '2',
- \\ 0,
- \\};
+ \\const v2: [5:0]u8 = "2.2.2".*;
\\pub export fn foo() void {}
});
@@ -1600,30 +1593,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
\\static char arr1[] = "hello";
\\char arr2[] = "hello";
, &[_][]const u8{
- \\pub export var arr0: [6]u8 = [6]u8{
- \\ 'h',
- \\ 'e',
- \\ 'l',
- \\ 'l',
- \\ 'o',
- \\ 0,
- \\};
- \\pub var arr1: [6]u8 = [6]u8{
- \\ 'h',
- \\ 'e',
- \\ 'l',
- \\ 'l',
- \\ 'o',
- \\ 0,
- \\};
- \\pub export var arr2: [6]u8 = [6]u8{
- \\ 'h',
- \\ 'e',
- \\ 'l',
- \\ 'l',
- \\ 'o',
- \\ 0,
- \\};
+ \\pub export var arr0: [5:0]u8 = "hello".*;
+ \\pub var arr1: [5:0]u8 = "hello".*;
+ \\pub export var arr2: [5:0]u8 = "hello".*;
});
cases.add("array initializer expr",
@@ -3425,4 +3397,49 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
, &[_][]const u8{
\\pub const FOO = @compileError("TODO implement function '__builtin_alloca_with_align' in std.c.builtins");
});
+
+ cases.add("null sentinel arrays when initialized from string literal. Issue #8256",
+ \\#include <stdint.h>
+ \\char zero[0] = "abc";
+ \\uint32_t zero_w[0] = U"๐ฏ๐ฏ๐ฏ";
+ \\char empty_incomplete[] = "";
+ \\uint32_t empty_incomplete_w[] = U"";
+ \\char empty_constant[100] = "";
+ \\uint32_t empty_constant_w[100] = U"";
+ \\char incomplete[] = "abc";
+ \\uint32_t incomplete_w[] = U"๐ฏ๐ฏ๐ฏ";
+ \\char truncated[1] = "abc";
+ \\uint32_t truncated_w[1] = U"๐ฏ๐ฏ๐ฏ";
+ \\char extend[5] = "a";
+ \\uint32_t extend_w[5] = U"๐ฏ";
+ \\char no_null[3] = "abc";
+ \\uint32_t no_null_w[3] = U"๐ฏ๐ฏ๐ฏ";
+ , &[_][]const u8{
+ \\pub export var zero: [0]u8 = [0]u8{};
+ \\pub export var zero_w: [0]u32 = [0]u32{};
+ \\pub export var empty_incomplete: [1]u8 = [1]u8{0} ** 1;
+ \\pub export var empty_incomplete_w: [1]u32 = [1]u32{0} ** 1;
+ \\pub export var empty_constant: [100]u8 = [1]u8{0} ** 100;
+ \\pub export var empty_constant_w: [100]u32 = [1]u32{0} ** 100;
+ \\pub export var incomplete: [3:0]u8 = "abc".*;
+ \\pub export var incomplete_w: [3:0]u32 = [3:0]u32{
+ \\ '\u{1f4af}',
+ \\ '\u{1f4af}',
+ \\ '\u{1f4af}',
+ \\};
+ \\pub export var truncated: [1]u8 = "abc"[0..1].*;
+ \\pub export var truncated_w: [1]u32 = [1]u32{
+ \\ '\u{1f4af}',
+ \\};
+ \\pub export var extend: [5]u8 = "a"[0..1].* ++ [1]u8{0} ** 4;
+ \\pub export var extend_w: [5]u32 = [1]u32{
+ \\ '\u{1f4af}',
+ \\} ++ [1]u32{0} ** 4;
+ \\pub export var no_null: [3]u8 = "abc".*;
+ \\pub export var no_null_w: [3]u32 = [3]u32{
+ \\ '\u{1f4af}',
+ \\ '\u{1f4af}',
+ \\ '\u{1f4af}',
+ \\};
+ });
}