Commit 8187396f64
Changed files (6)
src/ast_render.cpp
@@ -239,10 +239,80 @@ static bool is_node_void(AstNode *node) {
return false;
}
-static bool is_printable(uint8_t c) {
+static bool is_alpha_under(uint8_t c) {
return (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'A') ||
- (c >= '0' && c <= '9');
+ (c >= 'A' && c <= 'Z') || c == '_';
+}
+
+static bool is_digit(uint8_t c) {
+ return (c >= '0' && c <= '9');
+}
+
+static bool is_printable(uint8_t c) {
+ return is_alpha_under(c) || is_digit(c);
+}
+
+static void string_literal_escape(Buf *source, Buf *dest) {
+ buf_resize(dest, 0);
+ for (int i = 0; i < buf_len(source); i += 1) {
+ uint8_t c = *((uint8_t*)buf_ptr(source) + i);
+ if (is_printable(c)) {
+ buf_append_char(dest, c);
+ } else if (c == '\'') {
+ buf_append_str(dest, "\\'");
+ } else if (c == '"') {
+ buf_append_str(dest, "\\\"");
+ } else if (c == '\\') {
+ buf_append_str(dest, "\\\\");
+ } else if (c == '\a') {
+ buf_append_str(dest, "\\a");
+ } else if (c == '\b') {
+ buf_append_str(dest, "\\b");
+ } else if (c == '\f') {
+ buf_append_str(dest, "\\f");
+ } else if (c == '\n') {
+ buf_append_str(dest, "\\n");
+ } else if (c == '\r') {
+ buf_append_str(dest, "\\r");
+ } else if (c == '\t') {
+ buf_append_str(dest, "\\t");
+ } else if (c == '\v') {
+ buf_append_str(dest, "\\v");
+ } else {
+ buf_appendf(dest, "\\x%x", (int)c);
+ }
+ }
+}
+
+static bool is_valid_bare_symbol(Buf *symbol) {
+ if (buf_len(symbol) == 0) {
+ return false;
+ }
+ uint8_t first_char = *buf_ptr(symbol);
+ if (!is_alpha_under(first_char)) {
+ return false;
+ }
+ for (int i = 1; i < buf_len(symbol); i += 1) {
+ uint8_t c = *((uint8_t*)buf_ptr(symbol) + i);
+ if (!is_alpha_under(c) && !is_digit(c)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static void print_symbol(AstRender *ar, Buf *symbol) {
+ if (is_zig_keyword(symbol)) {
+ fprintf(ar->f, "@\"%s\"", buf_ptr(symbol));
+ return;
+ }
+ if (is_valid_bare_symbol(symbol)) {
+ fprintf(ar->f, "%s", buf_ptr(symbol));
+ return;
+ }
+ Buf escaped = BUF_INIT;
+ string_literal_escape(symbol, &escaped);
+ fprintf(ar->f, "@\"%s\"", buf_ptr(&escaped));
}
static void render_node(AstRender *ar, AstNode *node) {
@@ -268,20 +338,22 @@ static void render_node(AstRender *ar, AstNode *node) {
break;
case NodeTypeFnProto:
{
- const char *fn_name = buf_ptr(&node->data.fn_proto.name);
const char *pub_str = visib_mod_string(node->data.fn_proto.top_level_decl.visib_mod);
const char *extern_str = extern_string(node->data.fn_proto.is_extern);
const char *inline_str = inline_string(node->data.fn_proto.is_inline);
- fprintf(ar->f, "%s%s%sfn %s(", pub_str, inline_str, extern_str, fn_name);
+ fprintf(ar->f, "%s%s%sfn ", pub_str, inline_str, extern_str);
+ print_symbol(ar, &node->data.fn_proto.name);
+ fprintf(ar->f, "(");
int arg_count = node->data.fn_proto.params.length;
bool is_var_args = node->data.fn_proto.is_var_args;
for (int arg_i = 0; arg_i < arg_count; arg_i += 1) {
AstNode *param_decl = node->data.fn_proto.params.at(arg_i);
assert(param_decl->type == NodeTypeParamDecl);
- const char *arg_name = buf_ptr(¶m_decl->data.param_decl.name);
if (buf_len(¶m_decl->data.param_decl.name) > 0) {
const char *noalias_str = param_decl->data.param_decl.is_noalias ? "noalias " : "";
- fprintf(ar->f, "%s%s: ", noalias_str, arg_name);
+ fprintf(ar->f, "%s", noalias_str);
+ print_symbol(ar, ¶m_decl->data.param_decl.name);
+ fprintf(ar->f, ": ");
}
render_node(ar, param_decl->data.param_decl.type);
@@ -345,9 +417,10 @@ static void render_node(AstRender *ar, AstNode *node) {
{
const char *pub_str = visib_mod_string(node->data.variable_declaration.top_level_decl.visib_mod);
const char *extern_str = extern_string(node->data.variable_declaration.is_extern);
- const char *var_name = buf_ptr(&node->data.variable_declaration.symbol);
const char *const_or_var = const_or_var_string(node->data.variable_declaration.is_const);
- fprintf(ar->f, "%s%s%s %s", pub_str, extern_str, const_or_var, var_name);
+ fprintf(ar->f, "%s%s%s ", pub_str, extern_str, const_or_var);
+ print_symbol(ar, &node->data.variable_declaration.symbol);
+
if (node->data.variable_declaration.type) {
fprintf(ar->f, ": ");
render_node(ar, node->data.variable_declaration.type);
@@ -495,9 +568,8 @@ static void render_node(AstRender *ar, AstNode *node) {
for (int field_i = 0; field_i < node->data.struct_decl.fields.length; field_i += 1) {
AstNode *field_node = node->data.struct_decl.fields.at(field_i);
assert(field_node->type == NodeTypeStructField);
- const char *field_name = buf_ptr(&field_node->data.struct_field.name);
print_indent(ar);
- fprintf(ar->f, "%s", field_name);
+ print_symbol(ar, &field_node->data.struct_field.name);
if (!is_node_void(field_node->data.struct_field.type)) {
fprintf(ar->f, ": ");
render_node(ar, field_node->data.struct_field.type);
src/parseh.cpp
@@ -11,7 +11,6 @@
#include "error.hpp"
#include "parser.hpp"
#include "all_types.hpp"
-#include "tokenizer.hpp"
#include "c_tokenizer.hpp"
#include "analyze.hpp"
@@ -1265,10 +1264,6 @@ static void render_macros(Context *c) {
}
static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) {
- if (is_zig_keyword(name)) {
- return;
- }
-
tokenize_c_macro(ctok, (const uint8_t *)char_ptr);
if (ctok->error) {
src/parser.cpp
@@ -87,10 +87,6 @@ static AstNode *ast_create_void_type_node(ParseContext *pc, Token *token) {
return node;
}
-static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
- buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
-}
-
static void parse_asm_template(ParseContext *pc, AstNode *node) {
Buf *asm_template = &node->data.asm_expr.asm_template;
@@ -277,6 +273,8 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
// detect c string literal
enum State {
+ StatePre,
+ StateSkipQuot,
StateStart,
StateEscape,
StateHex1,
@@ -285,90 +283,100 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
buf_resize(buf, 0);
- State state = StateStart;
- bool skip_quote;
+ State state = StatePre;
SrcPos pos = {token->start_line, token->start_column};
int hex_value = 0;
for (int i = token->start_pos; i < token->end_pos - 1; i += 1) {
uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
- if (i == token->start_pos) {
- skip_quote = (c == 'c');
- if (out_c_str) {
- *out_c_str = skip_quote;
- } else if (skip_quote) {
- ast_error(pc, token, "C string literal not allowed here");
- }
- } else if (skip_quote) {
- skip_quote = false;
- } else {
- switch (state) {
- case StateStart:
- if (c == '\\') {
- state = StateEscape;
- } else {
- buf_append_char(buf, c);
- if (offset_map) offset_map->append(pos);
- }
- break;
- case StateEscape:
- switch (c) {
- case '\\':
- buf_append_char(buf, '\\');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case 'r':
- buf_append_char(buf, '\r');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case 'n':
- buf_append_char(buf, '\n');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case 't':
- buf_append_char(buf, '\t');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case '"':
- buf_append_char(buf, '"');
- if (offset_map) offset_map->append(pos);
- state = StateStart;
- break;
- case 'x':
- state = StateHex1;
- break;
- default:
- ast_error(pc, token, "invalid escape character");
- break;
- }
- break;
- case StateHex1:
- {
- int hex_digit = get_hex_digit(c);
- if (hex_digit == -1) {
- ast_error(pc, token, "invalid hex digit: '%c'", c);
- }
- hex_value = hex_digit * 16;
- state = StateHex2;
+ switch (state) {
+ case StatePre:
+ switch (c) {
+ case '@':
+ state = StateSkipQuot;
break;
- }
- case StateHex2:
- {
- int hex_digit = get_hex_digit(c);
- if (hex_digit == -1) {
- ast_error(pc, token, "invalid hex digit: '%c'", c);
+ case 'c':
+ if (out_c_str) {
+ *out_c_str = true;
+ } else {
+ ast_error(pc, token, "C string literal not allowed here");
}
- hex_value += hex_digit;
- assert(hex_value >= 0 && hex_value <= 255);
- buf_append_char(buf, hex_value);
+ state = StateSkipQuot;
+ break;
+ case '"':
state = StateStart;
break;
+ default:
+ ast_error(pc, token, "invalid string character");
+ }
+ break;
+ case StateSkipQuot:
+ state = StateStart;
+ break;
+ case StateStart:
+ if (c == '\\') {
+ state = StateEscape;
+ } else {
+ buf_append_char(buf, c);
+ if (offset_map) offset_map->append(pos);
+ }
+ break;
+ case StateEscape:
+ switch (c) {
+ case '\\':
+ buf_append_char(buf, '\\');
+ if (offset_map) offset_map->append(pos);
+ state = StateStart;
+ break;
+ case 'r':
+ buf_append_char(buf, '\r');
+ if (offset_map) offset_map->append(pos);
+ state = StateStart;
+ break;
+ case 'n':
+ buf_append_char(buf, '\n');
+ if (offset_map) offset_map->append(pos);
+ state = StateStart;
+ break;
+ case 't':
+ buf_append_char(buf, '\t');
+ if (offset_map) offset_map->append(pos);
+ state = StateStart;
+ break;
+ case '"':
+ buf_append_char(buf, '"');
+ if (offset_map) offset_map->append(pos);
+ state = StateStart;
+ break;
+ case 'x':
+ state = StateHex1;
+ break;
+ default:
+ ast_error(pc, token, "invalid escape character");
+ }
+ break;
+ case StateHex1:
+ {
+ int hex_digit = get_hex_digit(c);
+ if (hex_digit == -1) {
+ ast_error(pc, token, "invalid hex digit: '%c'", c);
}
- }
+ hex_value = hex_digit * 16;
+ state = StateHex2;
+ break;
+ }
+ case StateHex2:
+ {
+ int hex_digit = get_hex_digit(c);
+ if (hex_digit == -1) {
+ ast_error(pc, token, "invalid hex digit: '%c'", c);
+ }
+ hex_value += hex_digit;
+ assert(hex_value >= 0 && hex_value <= 255);
+ buf_append_char(buf, hex_value);
+ state = StateStart;
+ break;
+ }
}
if (c == '\n') {
pos.line += 1;
@@ -381,6 +389,17 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
if (offset_map) offset_map->append(pos);
}
+static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
+ uint8_t *first_char = (uint8_t *)buf_ptr(pc->buf) + token->start_pos;
+ bool at_sign = *first_char == '@';
+ if (at_sign) {
+ parse_string_literal(pc, token, buf, nullptr, nullptr);
+ } else {
+ buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
+ }
+}
+
+
static unsigned long long parse_int_digits(ParseContext *pc, int digits_start, int digits_end, int radix,
int skip_index, bool *overflow)
{
src/tokenizer.cpp
@@ -159,6 +159,7 @@ enum TokenizeState {
TokenizeStateSawDot,
TokenizeStateSawDotDot,
TokenizeStateSawQuestionMark,
+ TokenizeStateSawAtSign,
TokenizeStateError,
};
@@ -429,7 +430,7 @@ void tokenize(Buf *buf, Tokenization *out) {
break;
case '@':
begin_token(&t, TokenIdAtSign);
- end_token(&t);
+ t.state = TokenizeStateSawAtSign;
break;
case '-':
begin_token(&t, TokenIdDash);
@@ -858,6 +859,19 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
+ case TokenizeStateSawAtSign:
+ switch (c) {
+ case '"':
+ t.cur_tok->id = TokenIdSymbol;
+ t.state = TokenizeStateString;
+ break;
+ default:
+ t.pos -= 1;
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ continue;
+ }
+ break;
case TokenizeStateFirstR:
switch (c) {
case '"':
@@ -1131,6 +1145,7 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawGreaterThanGreaterThan:
case TokenizeStateSawDot:
case TokenizeStateSawQuestionMark:
+ case TokenizeStateSawAtSign:
end_token(&t);
break;
case TokenizeStateSawDotDot:
test/run_tests.cpp
@@ -1394,6 +1394,14 @@ void foo(void (__cdecl *fn_ptr)(void));
add_parseh_case("comment after integer literal", R"SOURCE(
#define SDL_INIT_VIDEO 0x00000020 /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
)SOURCE", 1, "pub const SDL_INIT_VIDEO = 32;");
+
+ add_parseh_case("zig keywords in C code", R"SOURCE(
+struct type {
+ int defer;
+};
+ )SOURCE", 2, R"(export struct struct_type {
+ @"defer": c_int,
+})", R"(pub const @"type" = struct_type;)");
}
static void run_self_hosted_test(void) {
test/self_hosted.zig
@@ -1295,3 +1295,7 @@ struct EmptyStruct {
#static_eval_enable(false)
fn method(es: EmptyStruct) -> i32 { 1234 }
}
+
+
+#attribute("test")
+fn @"weird function name"() { }