Commit 8187396f64

Andrew Kelley <superjoe30@gmail.com>
2016-04-23 00:24:18
add syntax to allow symbols to have arbitrary strings as names
1 parent 35362f8
src/ast_render.cpp
@@ -239,10 +239,80 @@ static bool is_node_void(AstNode *node) {
     return false;
 }
 
-static bool is_printable(uint8_t c) {
+static bool is_alpha_under(uint8_t c) {
     return (c >= 'a' && c <= 'z') ||
-           (c >= 'A' && c <= 'A') ||
-           (c >= '0' && c <= '9');
+        (c >= 'A' && c <= 'Z') || c == '_';
+}
+
+static bool is_digit(uint8_t c) {
+    return (c >= '0' && c <= '9');
+}
+
+static bool is_printable(uint8_t c) {
+    return is_alpha_under(c) || is_digit(c);
+}
+
+static void string_literal_escape(Buf *source, Buf *dest) {
+    buf_resize(dest, 0);
+    for (int i = 0; i < buf_len(source); i += 1) {
+        uint8_t c = *((uint8_t*)buf_ptr(source) + i);
+        if (is_printable(c)) {
+            buf_append_char(dest, c);
+        } else if (c == '\'') {
+            buf_append_str(dest, "\\'");
+        } else if (c == '"') {
+            buf_append_str(dest, "\\\"");
+        } else if (c == '\\') {
+            buf_append_str(dest, "\\\\");
+        } else if (c == '\a') {
+            buf_append_str(dest, "\\a");
+        } else if (c == '\b') {
+            buf_append_str(dest, "\\b");
+        } else if (c == '\f') {
+            buf_append_str(dest, "\\f");
+        } else if (c == '\n') {
+            buf_append_str(dest, "\\n");
+        } else if (c == '\r') {
+            buf_append_str(dest, "\\r");
+        } else if (c == '\t') {
+            buf_append_str(dest, "\\t");
+        } else if (c == '\v') {
+            buf_append_str(dest, "\\v");
+        } else {
+            buf_appendf(dest, "\\x%x", (int)c);
+        }
+    }
+}
+
+static bool is_valid_bare_symbol(Buf *symbol) {
+    if (buf_len(symbol) == 0) {
+        return false;
+    }
+    uint8_t first_char = *buf_ptr(symbol);
+    if (!is_alpha_under(first_char)) {
+        return false;
+    }
+    for (int i = 1; i < buf_len(symbol); i += 1) {
+        uint8_t c = *((uint8_t*)buf_ptr(symbol) + i);
+        if (!is_alpha_under(c) && !is_digit(c)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static void print_symbol(AstRender *ar, Buf *symbol) {
+    if (is_zig_keyword(symbol)) {
+        fprintf(ar->f, "@\"%s\"", buf_ptr(symbol));
+        return;
+    }
+    if (is_valid_bare_symbol(symbol)) {
+        fprintf(ar->f, "%s", buf_ptr(symbol));
+        return;
+    }
+    Buf escaped = BUF_INIT;
+    string_literal_escape(symbol, &escaped);
+    fprintf(ar->f, "@\"%s\"", buf_ptr(&escaped));
 }
 
 static void render_node(AstRender *ar, AstNode *node) {
@@ -268,20 +338,22 @@ static void render_node(AstRender *ar, AstNode *node) {
             break;
         case NodeTypeFnProto:
             {
-                const char *fn_name = buf_ptr(&node->data.fn_proto.name);
                 const char *pub_str = visib_mod_string(node->data.fn_proto.top_level_decl.visib_mod);
                 const char *extern_str = extern_string(node->data.fn_proto.is_extern);
                 const char *inline_str = inline_string(node->data.fn_proto.is_inline);
-                fprintf(ar->f, "%s%s%sfn %s(", pub_str, inline_str, extern_str, fn_name);
+                fprintf(ar->f, "%s%s%sfn ", pub_str, inline_str, extern_str);
+                print_symbol(ar, &node->data.fn_proto.name);
+                fprintf(ar->f, "(");
                 int arg_count = node->data.fn_proto.params.length;
                 bool is_var_args = node->data.fn_proto.is_var_args;
                 for (int arg_i = 0; arg_i < arg_count; arg_i += 1) {
                     AstNode *param_decl = node->data.fn_proto.params.at(arg_i);
                     assert(param_decl->type == NodeTypeParamDecl);
-                    const char *arg_name = buf_ptr(&param_decl->data.param_decl.name);
                     if (buf_len(&param_decl->data.param_decl.name) > 0) {
                         const char *noalias_str = param_decl->data.param_decl.is_noalias ? "noalias " : "";
-                        fprintf(ar->f, "%s%s: ", noalias_str, arg_name);
+                        fprintf(ar->f, "%s", noalias_str);
+                        print_symbol(ar, &param_decl->data.param_decl.name);
+                        fprintf(ar->f, ": ");
                     }
                     render_node(ar, param_decl->data.param_decl.type);
 
@@ -345,9 +417,10 @@ static void render_node(AstRender *ar, AstNode *node) {
             {
                 const char *pub_str = visib_mod_string(node->data.variable_declaration.top_level_decl.visib_mod);
                 const char *extern_str = extern_string(node->data.variable_declaration.is_extern);
-                const char *var_name = buf_ptr(&node->data.variable_declaration.symbol);
                 const char *const_or_var = const_or_var_string(node->data.variable_declaration.is_const);
-                fprintf(ar->f, "%s%s%s %s", pub_str, extern_str, const_or_var, var_name);
+                fprintf(ar->f, "%s%s%s ", pub_str, extern_str, const_or_var);
+                print_symbol(ar, &node->data.variable_declaration.symbol);
+
                 if (node->data.variable_declaration.type) {
                     fprintf(ar->f, ": ");
                     render_node(ar, node->data.variable_declaration.type);
@@ -495,9 +568,8 @@ static void render_node(AstRender *ar, AstNode *node) {
                 for (int field_i = 0; field_i < node->data.struct_decl.fields.length; field_i += 1) {
                     AstNode *field_node = node->data.struct_decl.fields.at(field_i);
                     assert(field_node->type == NodeTypeStructField);
-                    const char *field_name = buf_ptr(&field_node->data.struct_field.name);
                     print_indent(ar);
-                    fprintf(ar->f, "%s", field_name);
+                    print_symbol(ar, &field_node->data.struct_field.name);
                     if (!is_node_void(field_node->data.struct_field.type)) {
                         fprintf(ar->f, ": ");
                         render_node(ar, field_node->data.struct_field.type);
src/parseh.cpp
@@ -11,7 +11,6 @@
 #include "error.hpp"
 #include "parser.hpp"
 #include "all_types.hpp"
-#include "tokenizer.hpp"
 #include "c_tokenizer.hpp"
 #include "analyze.hpp"
 
@@ -1265,10 +1264,6 @@ static void render_macros(Context *c) {
 }
 
 static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) {
-    if (is_zig_keyword(name)) {
-        return;
-    }
-
     tokenize_c_macro(ctok, (const uint8_t *)char_ptr);
 
     if (ctok->error) {
src/parser.cpp
@@ -87,10 +87,6 @@ static AstNode *ast_create_void_type_node(ParseContext *pc, Token *token) {
     return node;
 }
 
-static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
-    buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
-}
-
 static void parse_asm_template(ParseContext *pc, AstNode *node) {
     Buf *asm_template = &node->data.asm_expr.asm_template;
 
@@ -277,6 +273,8 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
     // detect c string literal
 
     enum State {
+        StatePre,
+        StateSkipQuot,
         StateStart,
         StateEscape,
         StateHex1,
@@ -285,90 +283,100 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
 
     buf_resize(buf, 0);
 
-    State state = StateStart;
-    bool skip_quote;
+    State state = StatePre;
     SrcPos pos = {token->start_line, token->start_column};
     int hex_value = 0;
     for (int i = token->start_pos; i < token->end_pos - 1; i += 1) {
         uint8_t c = *((uint8_t*)buf_ptr(pc->buf) + i);
 
-        if (i == token->start_pos) {
-            skip_quote = (c == 'c');
-            if (out_c_str) {
-                *out_c_str = skip_quote;
-            } else if (skip_quote) {
-                ast_error(pc, token, "C string literal not allowed here");
-            }
-        } else if (skip_quote) {
-            skip_quote = false;
-        } else {
-            switch (state) {
-                case StateStart:
-                    if (c == '\\') {
-                        state = StateEscape;
-                    } else {
-                        buf_append_char(buf, c);
-                        if (offset_map) offset_map->append(pos);
-                    }
-                    break;
-                case StateEscape:
-                    switch (c) {
-                        case '\\':
-                            buf_append_char(buf, '\\');
-                            if (offset_map) offset_map->append(pos);
-                            state = StateStart;
-                            break;
-                        case 'r':
-                            buf_append_char(buf, '\r');
-                            if (offset_map) offset_map->append(pos);
-                            state = StateStart;
-                            break;
-                        case 'n':
-                            buf_append_char(buf, '\n');
-                            if (offset_map) offset_map->append(pos);
-                            state = StateStart;
-                            break;
-                        case 't':
-                            buf_append_char(buf, '\t');
-                            if (offset_map) offset_map->append(pos);
-                            state = StateStart;
-                            break;
-                        case '"':
-                            buf_append_char(buf, '"');
-                            if (offset_map) offset_map->append(pos);
-                            state = StateStart;
-                            break;
-                        case 'x':
-                            state = StateHex1;
-                            break;
-                        default:
-                            ast_error(pc, token, "invalid escape character");
-                            break;
-                    }
-                    break;
-                case StateHex1:
-                    {
-                        int hex_digit = get_hex_digit(c);
-                        if (hex_digit == -1) {
-                            ast_error(pc, token, "invalid hex digit: '%c'", c);
-                        }
-                        hex_value = hex_digit * 16;
-                        state = StateHex2;
+        switch (state) {
+            case StatePre:
+                switch (c) {
+                    case '@':
+                        state = StateSkipQuot;
                         break;
-                    }
-                case StateHex2:
-                    {
-                        int hex_digit = get_hex_digit(c);
-                        if (hex_digit == -1) {
-                            ast_error(pc, token, "invalid hex digit: '%c'", c);
+                    case 'c':
+                        if (out_c_str) {
+                            *out_c_str = true;
+                        } else {
+                            ast_error(pc, token, "C string literal not allowed here");
                         }
-                        hex_value += hex_digit;
-                        assert(hex_value >= 0 && hex_value <= 255);
-                        buf_append_char(buf, hex_value);
+                        state = StateSkipQuot;
+                        break;
+                    case '"':
                         state = StateStart;
                         break;
+                    default:
+                        ast_error(pc, token, "invalid string character");
+                }
+                break;
+            case StateSkipQuot:
+                state = StateStart;
+                break;
+            case StateStart:
+                if (c == '\\') {
+                    state = StateEscape;
+                } else {
+                    buf_append_char(buf, c);
+                    if (offset_map) offset_map->append(pos);
+                }
+                break;
+            case StateEscape:
+                switch (c) {
+                    case '\\':
+                        buf_append_char(buf, '\\');
+                        if (offset_map) offset_map->append(pos);
+                        state = StateStart;
+                        break;
+                    case 'r':
+                        buf_append_char(buf, '\r');
+                        if (offset_map) offset_map->append(pos);
+                        state = StateStart;
+                        break;
+                    case 'n':
+                        buf_append_char(buf, '\n');
+                        if (offset_map) offset_map->append(pos);
+                        state = StateStart;
+                        break;
+                    case 't':
+                        buf_append_char(buf, '\t');
+                        if (offset_map) offset_map->append(pos);
+                        state = StateStart;
+                        break;
+                    case '"':
+                        buf_append_char(buf, '"');
+                        if (offset_map) offset_map->append(pos);
+                        state = StateStart;
+                        break;
+                    case 'x':
+                        state = StateHex1;
+                        break;
+                    default:
+                        ast_error(pc, token, "invalid escape character");
+                }
+                break;
+            case StateHex1:
+                {
+                    int hex_digit = get_hex_digit(c);
+                    if (hex_digit == -1) {
+                        ast_error(pc, token, "invalid hex digit: '%c'", c);
                     }
-            }
+                    hex_value = hex_digit * 16;
+                    state = StateHex2;
+                    break;
+                }
+            case StateHex2:
+                {
+                    int hex_digit = get_hex_digit(c);
+                    if (hex_digit == -1) {
+                        ast_error(pc, token, "invalid hex digit: '%c'", c);
+                    }
+                    hex_value += hex_digit;
+                    assert(hex_value >= 0 && hex_value <= 255);
+                    buf_append_char(buf, hex_value);
+                    state = StateStart;
+                    break;
+                }
         }
         if (c == '\n') {
             pos.line += 1;
@@ -381,6 +389,17 @@ static void parse_string_literal(ParseContext *pc, Token *token, Buf *buf, bool
     if (offset_map) offset_map->append(pos);
 }
 
+static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
+    uint8_t *first_char = (uint8_t *)buf_ptr(pc->buf) + token->start_pos;
+    bool at_sign = *first_char == '@';
+    if (at_sign) {
+        parse_string_literal(pc, token, buf, nullptr, nullptr);
+    } else {
+        buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
+    }
+}
+
+
 static unsigned long long parse_int_digits(ParseContext *pc, int digits_start, int digits_end, int radix,
     int skip_index, bool *overflow)
 {
src/tokenizer.cpp
@@ -159,6 +159,7 @@ enum TokenizeState {
     TokenizeStateSawDot,
     TokenizeStateSawDotDot,
     TokenizeStateSawQuestionMark,
+    TokenizeStateSawAtSign,
     TokenizeStateError,
 };
 
@@ -429,7 +430,7 @@ void tokenize(Buf *buf, Tokenization *out) {
                         break;
                     case '@':
                         begin_token(&t, TokenIdAtSign);
-                        end_token(&t);
+                        t.state = TokenizeStateSawAtSign;
                         break;
                     case '-':
                         begin_token(&t, TokenIdDash);
@@ -858,6 +859,19 @@ void tokenize(Buf *buf, Tokenization *out) {
                         continue;
                 }
                 break;
+            case TokenizeStateSawAtSign:
+                switch (c) {
+                    case '"':
+                        t.cur_tok->id = TokenIdSymbol;
+                        t.state = TokenizeStateString;
+                        break;
+                    default:
+                        t.pos -= 1;
+                        end_token(&t);
+                        t.state = TokenizeStateStart;
+                        continue;
+                }
+                break;
             case TokenizeStateFirstR:
                 switch (c) {
                     case '"':
@@ -1131,6 +1145,7 @@ void tokenize(Buf *buf, Tokenization *out) {
         case TokenizeStateSawGreaterThanGreaterThan:
         case TokenizeStateSawDot:
         case TokenizeStateSawQuestionMark:
+        case TokenizeStateSawAtSign:
             end_token(&t);
             break;
         case TokenizeStateSawDotDot:
test/run_tests.cpp
@@ -1394,6 +1394,14 @@ void foo(void (__cdecl *fn_ptr)(void));
     add_parseh_case("comment after integer literal", R"SOURCE(
 #define SDL_INIT_VIDEO 0x00000020  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
     )SOURCE", 1, "pub const SDL_INIT_VIDEO = 32;");
+
+    add_parseh_case("zig keywords in C code", R"SOURCE(
+struct type {
+    int defer;
+};
+    )SOURCE", 2, R"(export struct struct_type {
+    @"defer": c_int,
+})", R"(pub const @"type" = struct_type;)");
 }
 
 static void run_self_hosted_test(void) {
test/self_hosted.zig
@@ -1295,3 +1295,7 @@ struct EmptyStruct {
     #static_eval_enable(false)
     fn method(es: EmptyStruct) -> i32 { 1234 }
 }
+
+
+#attribute("test")
+fn @"weird function name"() { }