Commit 35362f8137
Changed files (5)
src/c_tokenizer.cpp
@@ -0,0 +1,651 @@
+/*
+ * Copyright (c) 2016 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
+#include "c_tokenizer.hpp"
+#include <inttypes.h>
+
+#define WHITESPACE_EXCEPT_N \
+ ' ': \
+ case '\t': \
+ case '\v': \
+ case '\f'
+
+#define DIGIT_NON_ZERO \
+ '1': \
+ case '2': \
+ case '3': \
+ case '4': \
+ case '5': \
+ case '6': \
+ case '7': \
+ case '8': \
+ case '9'
+
+#define DIGIT \
+ '0': \
+ case DIGIT_NON_ZERO
+
+#define ALPHA \
+ 'a': \
+ case 'b': \
+ case 'c': \
+ case 'd': \
+ case 'e': \
+ case 'f': \
+ case 'g': \
+ case 'h': \
+ case 'i': \
+ case 'j': \
+ case 'k': \
+ case 'l': \
+ case 'm': \
+ case 'n': \
+ case 'o': \
+ case 'p': \
+ case 'q': \
+ case 'r': \
+ case 's': \
+ case 't': \
+ case 'u': \
+ case 'v': \
+ case 'w': \
+ case 'x': \
+ case 'y': \
+ case 'z': \
+ case 'A': \
+ case 'B': \
+ case 'C': \
+ case 'D': \
+ case 'E': \
+ case 'F': \
+ case 'G': \
+ case 'H': \
+ case 'I': \
+ case 'J': \
+ case 'K': \
+ case 'L': \
+ case 'M': \
+ case 'N': \
+ case 'O': \
+ case 'P': \
+ case 'Q': \
+ case 'R': \
+ case 'S': \
+ case 'T': \
+ case 'U': \
+ case 'V': \
+ case 'W': \
+ case 'X': \
+ case 'Y': \
+ case 'Z'
+
+#define IDENT_START \
+ ALPHA: \
+ case '_'
+
+#define IDENT \
+ IDENT_START: \
+ case DIGIT
+
+
+static void begin_token(CTokenize *ctok, CTokId id) {
+ assert(ctok->cur_tok == nullptr);
+ ctok->tokens.add_one();
+ ctok->cur_tok = &ctok->tokens.last();
+ ctok->cur_tok->id = id;
+
+ switch (id) {
+ case CTokIdStrLit:
+ memset(&ctok->cur_tok->data.str_lit, 0, sizeof(Buf));
+ buf_resize(&ctok->cur_tok->data.str_lit, 0);
+ break;
+ case CTokIdSymbol:
+ memset(&ctok->cur_tok->data.symbol, 0, sizeof(Buf));
+ buf_resize(&ctok->cur_tok->data.symbol, 0);
+ break;
+ case CTokIdCharLit:
+ case CTokIdNumLitInt:
+ case CTokIdNumLitFloat:
+ case CTokIdMinus:
+ break;
+ }
+}
+
+static void end_token(CTokenize *ctok) {
+ ctok->cur_tok = nullptr;
+}
+
+static void mark_error(CTokenize *ctok) {
+ ctok->error = true;
+}
+
+static void add_char(CTokenize *ctok, uint8_t c) {
+ assert(ctok->cur_tok);
+ if (ctok->cur_tok->id == CTokIdCharLit) {
+ ctok->cur_tok->data.char_lit = c;
+ ctok->state = CTokStateExpectEndQuot;
+ } else if (ctok->cur_tok->id == CTokIdStrLit) {
+ buf_append_char(&ctok->cur_tok->data.str_lit, c);
+ ctok->state = CTokStateString;
+ } else {
+ zig_unreachable();
+ }
+}
+
+static void hex_digit(CTokenize *ctok, uint8_t value) {
+ // TODO @mul_with_overflow
+ ctok->cur_tok->data.num_lit_int *= 16;
+ // TODO @add_with_overflow
+ ctok->cur_tok->data.num_lit_int += value;
+
+ static const uint8_t hex_digit[] = "0123456789abcdef";
+ buf_append_char(&ctok->buf, hex_digit[value]);
+}
+
+static void end_float(CTokenize *ctok) {
+ // TODO detect errors, overflow, and underflow
+ double value = strtod(buf_ptr(&ctok->buf), nullptr);
+
+ ctok->cur_tok->data.num_lit_float = value;
+
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+
+}
+
+void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) {
+ ctok->tokens.resize(0);
+ ctok->state = CTokStateStart;
+ ctok->error = false;
+ ctok->cur_tok = nullptr;
+
+ buf_resize(&ctok->buf, 0);
+
+ for (; *c; c += 1) {
+ switch (ctok->state) {
+ case CTokStateStart:
+ switch (*c) {
+ case WHITESPACE_EXCEPT_N:
+ break;
+ case '\'':
+ ctok->state = CTokStateExpectChar;
+ begin_token(ctok, CTokIdCharLit);
+ break;
+ case '\"':
+ ctok->state = CTokStateString;
+ begin_token(ctok, CTokIdStrLit);
+ break;
+ case '/':
+ ctok->state = CTokStateOpenComment;
+ break;
+ case '\\':
+ ctok->state = CTokStateBackslash;
+ break;
+ case '\n':
+ goto found_end_of_macro;
+ case IDENT_START:
+ ctok->state = CTokStateIdentifier;
+ begin_token(ctok, CTokIdSymbol);
+ buf_append_char(&ctok->cur_tok->data.symbol, *c);
+ break;
+ case DIGIT_NON_ZERO:
+ ctok->state = CTokStateDecimal;
+ ctok->unsigned_suffix = false;
+ ctok->long_suffix = false;
+ begin_token(ctok, CTokIdNumLitInt);
+ ctok->cur_tok->data.num_lit_int = *c - '0';
+ buf_resize(&ctok->buf, 0);
+ buf_append_char(&ctok->buf, *c);
+ break;
+ case '0':
+ ctok->state = CTokStateGotZero;
+ ctok->unsigned_suffix = false;
+ ctok->long_suffix = false;
+ begin_token(ctok, CTokIdNumLitInt);
+ ctok->cur_tok->data.num_lit_int = 0;
+ buf_resize(&ctok->buf, 0);
+ buf_append_char(&ctok->buf, '0');
+ break;
+ case '.':
+ begin_token(ctok, CTokIdNumLitFloat);
+ ctok->state = CTokStateFloat;
+ buf_init_from_str(&ctok->buf, "0.");
+ break;
+ default:
+ return mark_error(ctok);
+ }
+ break;
+ case CTokStateFloat:
+ switch (*c) {
+ case 'e':
+ case 'E':
+ buf_append_char(&ctok->buf, 'e');
+ ctok->state = CTokStateExpSign;
+ break;
+ case 'f':
+ case 'F':
+ case 'l':
+ case 'L':
+ end_float(ctok);
+ break;
+ case DIGIT:
+ buf_append_char(&ctok->buf, *c);
+ break;
+ default:
+ c -= 1;
+ end_float(ctok);
+ continue;
+ }
+ break;
+ case CTokStateExpSign:
+ switch (*c) {
+ case '+':
+ case '-':
+ ctok->state = CTokStateFloatExpFirst;
+ buf_append_char(&ctok->buf, *c);
+ break;
+ case DIGIT:
+ ctok->state = CTokStateFloatExp;
+ buf_append_char(&ctok->buf, *c);
+ break;
+ default:
+ return mark_error(ctok);
+ }
+ break;
+ case CTokStateFloatExpFirst:
+ switch (*c) {
+ case DIGIT:
+ buf_append_char(&ctok->buf, *c);
+ ctok->state = CTokStateFloatExp;
+ break;
+ default:
+ return mark_error(ctok);
+ }
+ break;
+ case CTokStateFloatExp:
+ switch (*c) {
+ case DIGIT:
+ buf_append_char(&ctok->buf, *c);
+ break;
+ case 'f':
+ case 'F':
+ case 'l':
+ case 'L':
+ end_float(ctok);
+ break;
+ default:
+ c -= 1;
+ end_float(ctok);
+ continue;
+ }
+ break;
+ case CTokStateDecimal:
+ switch (*c) {
+ case DIGIT:
+ buf_append_char(&ctok->buf, *c);
+
+ // TODO @mul_with_overflow
+ ctok->cur_tok->data.num_lit_int *= 10;
+ // TODO @add_with_overflow
+ ctok->cur_tok->data.num_lit_int += *c - '0';
+ break;
+ case '\'':
+ break;
+ case 'u':
+ case 'U':
+ ctok->unsigned_suffix = true;
+ ctok->state = CTokStateIntSuffix;
+ break;
+ case 'l':
+ case 'L':
+ ctok->long_suffix = true;
+ ctok->state = CTokStateIntSuffixLong;
+ break;
+ case '.':
+ buf_append_char(&ctok->buf, '.');
+ ctok->cur_tok->id = CTokIdNumLitFloat;
+ ctok->state = CTokStateFloat;
+ break;
+ default:
+ c -= 1;
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+ continue;
+ }
+ break;
+ case CTokStateIntSuffix:
+ switch (*c) {
+ case 'l':
+ case 'L':
+ if (ctok->long_suffix) {
+ return mark_error(ctok);
+ }
+ ctok->long_suffix = true;
+ ctok->state = CTokStateIntSuffixLong;
+ break;
+ case 'u':
+ case 'U':
+ if (ctok->unsigned_suffix) {
+ return mark_error(ctok);
+ }
+ ctok->unsigned_suffix = true;
+ break;
+ default:
+ c -= 1;
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+ continue;
+ }
+ break;
+ case CTokStateIntSuffixLong:
+ switch (*c) {
+ case 'l':
+ case 'L':
+ ctok->state = CTokStateIntSuffix;
+ break;
+ case 'u':
+ case 'U':
+ if (ctok->unsigned_suffix) {
+ return mark_error(ctok);
+ }
+ ctok->unsigned_suffix = true;
+ break;
+ default:
+ c -= 1;
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+ continue;
+ }
+ break;
+ case CTokStateGotZero:
+ switch (*c) {
+ case 'x':
+ case 'X':
+ ctok->state = CTokStateHex;
+ break;
+ case '.':
+ ctok->state = CTokStateFloat;
+ ctok->cur_tok->id = CTokIdNumLitFloat;
+ buf_append_char(&ctok->buf, '.');
+ break;
+ default:
+ c -= 1;
+ ctok->state = CTokStateOctal;
+ continue;
+ }
+ break;
+ case CTokStateOctal:
+ switch (*c) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ // TODO @mul_with_overflow
+ ctok->cur_tok->data.num_lit_int *= 8;
+ // TODO @add_with_overflow
+ ctok->cur_tok->data.num_lit_int += *c - '0';
+ break;
+ case '8':
+ case '9':
+ return mark_error(ctok);
+ case '\'':
+ break;
+ default:
+ c -= 1;
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+ continue;
+ }
+ break;
+ case CTokStateHex:
+ switch (*c) {
+ case '0':
+ hex_digit(ctok, 0);
+ break;
+ case '1':
+ hex_digit(ctok, 1);
+ break;
+ case '2':
+ hex_digit(ctok, 2);
+ break;
+ case '3':
+ hex_digit(ctok, 3);
+ break;
+ case '4':
+ hex_digit(ctok, 4);
+ break;
+ case '5':
+ hex_digit(ctok, 5);
+ break;
+ case '6':
+ hex_digit(ctok, 6);
+ break;
+ case '7':
+ hex_digit(ctok, 7);
+ break;
+ case '8':
+ hex_digit(ctok, 8);
+ break;
+ case '9':
+ hex_digit(ctok, 9);
+ break;
+ case 'a':
+ case 'A':
+ hex_digit(ctok, 10);
+ break;
+ case 'b':
+ case 'B':
+ hex_digit(ctok, 11);
+ break;
+ case 'c':
+ case 'C':
+ hex_digit(ctok, 12);
+ break;
+ case 'd':
+ case 'D':
+ hex_digit(ctok, 13);
+ break;
+ case 'e':
+ case 'E':
+ hex_digit(ctok, 14);
+ break;
+ case 'f':
+ case 'F':
+ hex_digit(ctok, 15);
+ break;
+ case 'p':
+ case 'P':
+ ctok->cur_tok->id = CTokIdNumLitFloat;
+ ctok->state = CTokStateExpSign;
+ break;
+ default:
+ c -= 1;
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+ continue;
+ }
+ break;
+ case CTokStateIdentifier:
+ switch (*c) {
+ case IDENT:
+ buf_append_char(&ctok->cur_tok->data.symbol, *c);
+ break;
+ default:
+ c -= 1;
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+ continue;
+ }
+ break;
+ case CTokStateString:
+ switch (*c) {
+ case '\\':
+ ctok->state = CTokStateCharEscape;
+ break;
+ case '\"':
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+ break;
+ default:
+ buf_append_char(&ctok->cur_tok->data.str_lit, *c);
+ }
+ break;
+ case CTokStateExpectChar:
+ switch (*c) {
+ case '\\':
+ ctok->state = CTokStateCharEscape;
+ break;
+ case '\'':
+ return mark_error(ctok);
+ default:
+ ctok->cur_tok->data.char_lit = *c;
+ ctok->state = CTokStateExpectEndQuot;
+ }
+ break;
+ case CTokStateCharEscape:
+ switch (*c) {
+ case '\'':
+ case '"':
+ case '?':
+ case '\\':
+ add_char(ctok, *c);
+ break;
+ case 'a':
+ add_char(ctok, '\a');
+ break;
+ case 'b':
+ add_char(ctok, '\b');
+ break;
+ case 'f':
+ add_char(ctok, '\f');
+ break;
+ case 'n':
+ add_char(ctok, '\n');
+ break;
+ case 'r':
+ add_char(ctok, '\r');
+ break;
+ case 't':
+ add_char(ctok, '\t');
+ break;
+ case 'v':
+ add_char(ctok, '\v');
+ break;
+ case DIGIT:
+ zig_panic("TODO octal");
+ break;
+ case 'x':
+ zig_panic("TODO hex");
+ break;
+ case 'u':
+ zig_panic("TODO unicode");
+ break;
+ case 'U':
+ zig_panic("TODO Unicode");
+ break;
+ default:
+ return mark_error(ctok);
+ }
+ break;
+ case CTokStateExpectEndQuot:
+ switch (*c) {
+ case '\'':
+ end_token(ctok);
+ ctok->state = CTokStateStart;
+ break;
+ default:
+ return mark_error(ctok);
+ }
+ break;
+ case CTokStateOpenComment:
+ switch (*c) {
+ case '/':
+ ctok->state = CTokStateLineComment;
+ break;
+ case '*':
+ ctok->state = CTokStateComment;
+ break;
+ default:
+ return mark_error(ctok);
+ }
+ break;
+ case CTokStateLineComment:
+ if (*c == '\n') {
+ ctok->state = CTokStateStart;
+ goto found_end_of_macro;
+ }
+ break;
+ case CTokStateComment:
+ switch (*c) {
+ case '*':
+ ctok->state = CTokStateCommentStar;
+ break;
+ default:
+ break;
+ }
+ break;
+ case CTokStateCommentStar:
+ switch (*c) {
+ case '/':
+ ctok->state = CTokStateStart;
+ break;
+ case '*':
+ break;
+ default:
+ ctok->state = CTokStateComment;
+ break;
+ }
+ break;
+ case CTokStateBackslash:
+ switch (*c) {
+ case '\n':
+ ctok->state = CTokStateStart;
+ break;
+ default:
+ return mark_error(ctok);
+ }
+ break;
+ }
+ }
+found_end_of_macro:
+
+ switch (ctok->state) {
+ case CTokStateStart:
+ break;
+ case CTokStateIdentifier:
+ case CTokStateDecimal:
+ case CTokStateHex:
+ case CTokStateOctal:
+ case CTokStateGotZero:
+ case CTokStateIntSuffix:
+ case CTokStateIntSuffixLong:
+ end_token(ctok);
+ break;
+ case CTokStateFloat:
+ case CTokStateFloatExp:
+ end_float(ctok);
+ break;
+ case CTokStateExpectChar:
+ case CTokStateExpectEndQuot:
+ case CTokStateOpenComment:
+ case CTokStateLineComment:
+ case CTokStateComment:
+ case CTokStateCommentStar:
+ case CTokStateCharEscape:
+ case CTokStateBackslash:
+ case CTokStateString:
+ case CTokStateExpSign:
+ case CTokStateFloatExpFirst:
+ return mark_error(ctok);
+ }
+
+ assert(ctok->cur_tok == nullptr);
+}
src/c_tokenizer.hpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
+
+#ifndef ZIG_C_TOKENIZER_HPP
+#define ZIG_C_TOKENIZER_HPP
+
+#include "buffer.hpp"
+
+enum CTokId {
+ CTokIdCharLit,
+ CTokIdStrLit,
+ CTokIdNumLitInt,
+ CTokIdNumLitFloat,
+ CTokIdSymbol,
+ CTokIdMinus,
+};
+
+struct CTok {
+ enum CTokId id;
+ union {
+ uint8_t char_lit;
+ Buf str_lit;
+ uint64_t num_lit_int;
+ double num_lit_float;
+ Buf symbol;
+ } data;
+};
+
+enum CTokState {
+ CTokStateStart,
+ CTokStateExpectChar,
+ CTokStateCharEscape,
+ CTokStateExpectEndQuot,
+ CTokStateOpenComment,
+ CTokStateLineComment,
+ CTokStateComment,
+ CTokStateCommentStar,
+ CTokStateBackslash,
+ CTokStateString,
+ CTokStateIdentifier,
+ CTokStateDecimal,
+ CTokStateOctal,
+ CTokStateGotZero,
+ CTokStateHex,
+ CTokStateIntSuffix,
+ CTokStateIntSuffixLong,
+ CTokStateFloat,
+ CTokStateExpSign,
+ CTokStateFloatExp,
+ CTokStateFloatExpFirst,
+};
+
+struct CTokenize {
+ ZigList<CTok> tokens;
+ CTokState state;
+ bool error;
+ CTok *cur_tok;
+ Buf buf;
+ bool unsigned_suffix;
+ bool long_suffix;
+};
+
+void tokenize_c_macro(CTokenize *ctok, const uint8_t *c);
+
+#endif
src/parseh.cpp
@@ -12,6 +12,7 @@
#include "parser.hpp"
#include "all_types.hpp"
#include "tokenizer.hpp"
+#include "c_tokenizer.hpp"
#include "analyze.hpp"
#include <clang/Frontend/ASTUnit.h>
@@ -176,6 +177,19 @@ static AstNode *create_str_lit_node(Context *c, Buf *buf) {
return node;
}
+static AstNode *create_num_lit_float(Context *c, double x) {
+ AstNode *node = create_node(c, NodeTypeNumberLiteral);
+ node->data.number_literal.kind = NumLitFloat;
+ node->data.number_literal.data.x_float = x;
+ return node;
+}
+
+static AstNode *create_num_lit_float_negative(Context *c, double x, bool negative) {
+ AstNode *num_lit_node = create_num_lit_float(c, x);
+ if (!negative) return num_lit_node;
+ return create_prefix_node(c, PrefixOpNegation, num_lit_node);
+}
+
static AstNode *create_num_lit_unsigned(Context *c, uint64_t x) {
AstNode *node = create_node(c, NodeTypeNumberLiteral);
node->data.number_literal.kind = NumLitUInt;
@@ -183,6 +197,12 @@ static AstNode *create_num_lit_unsigned(Context *c, uint64_t x) {
return node;
}
+static AstNode *create_num_lit_unsigned_negative(Context *c, uint64_t x, bool negative) {
+ AstNode *num_lit_node = create_num_lit_unsigned(c, x);
+ if (!negative) return num_lit_node;
+ return create_prefix_node(c, PrefixOpNegation, num_lit_node);
+}
+
static AstNode *create_num_lit_signed(Context *c, int64_t x) {
if (x >= 0) {
return create_num_lit_unsigned(c, x);
@@ -1244,209 +1264,70 @@ static void render_macros(Context *c) {
}
}
-static int parse_c_char_lit(Buf *value, uint8_t *out_c) {
- enum State {
- StateExpectStartQuot,
- StateExpectChar,
- StateExpectEndQuot,
- StateExpectEnd,
- };
- State state = StateExpectStartQuot;
- for (int i = 0; i < buf_len(value); i += 1) {
- uint8_t c = buf_ptr(value)[i];
- switch (state) {
- case StateExpectStartQuot:
- switch (c) {
- case '\'':
- state = StateExpectChar;
- break;
- default:
- return -1;
- }
- break;
- case StateExpectChar:
- switch (c) {
- case '\\':
- case '\'':
- return -1;
- default:
- *out_c = c;
- state = StateExpectEndQuot;
- }
- break;
- case StateExpectEndQuot:
- switch (c) {
- case '\'':
- state = StateExpectEnd;
- break;
- default:
- return -1;
- }
- break;
- case StateExpectEnd:
- return -1;
- }
- }
- return (state == StateExpectEnd) ? 0 : -1;
-}
-
-static int parse_c_num_lit_unsigned(Buf *buf, uint64_t *out_val) {
- char *temp;
- *out_val = strtoull(buf_ptr(buf), &temp, 0);
-
- if (temp == buf_ptr(buf) || *temp != 0 || *out_val == ULLONG_MAX) {
- return -1;
- }
-
- return 0;
-}
-
-static bool is_simple_symbol(Buf *buf) {
- bool first = true;
- for (int i = 0; i < buf_len(buf); i += 1) {
- uint8_t c = buf_ptr(buf)[i];
- bool valid_alpha = (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') || c == '_';
- bool valid_digit = (c >= '0' && c <= '9');
-
- bool ok = (valid_alpha || (!first && valid_digit));
- first = false;
-
- if (!ok) {
- return false;
- }
- }
- return true;
-}
-
-enum ParseCStrState {
- ParseCStrStateExpectQuot,
- ParseCStrStateNormal,
- ParseCStrStateEscape,
-};
-
-static int parse_c_str_lit(Buf *buf, Buf *out_str) {
- ParseCStrState state = ParseCStrStateExpectQuot;
- buf_resize(out_str, 0);
-
- for (int i = 0; i < buf_len(buf); i += 1) {
- uint8_t c = buf_ptr(buf)[i];
- switch (state) {
- case ParseCStrStateExpectQuot:
- if (c == '"') {
- state = ParseCStrStateNormal;
- } else {
- return -1;
- }
- break;
- case ParseCStrStateNormal:
- switch (c) {
- case '\\':
- state = ParseCStrStateEscape;
- break;
- case '\n':
- return -1;
- case '"':
- return 0;
- default:
- buf_append_char(out_str, c);
- }
- break;
- case ParseCStrStateEscape:
- switch (c) {
- case '\'':
- buf_append_char(out_str, '\'');
- state = ParseCStrStateNormal;
- break;
- case '"':
- buf_append_char(out_str, '"');
- state = ParseCStrStateNormal;
- break;
- case '?':
- buf_append_char(out_str, '\?');
- state = ParseCStrStateNormal;
- break;
- case '\\':
- buf_append_char(out_str, '\\');
- state = ParseCStrStateNormal;
- break;
- case 'a':
- buf_append_char(out_str, '\a');
- state = ParseCStrStateNormal;
- break;
- case 'b':
- buf_append_char(out_str, '\b');
- state = ParseCStrStateNormal;
- break;
- case 'f':
- buf_append_char(out_str, '\f');
- state = ParseCStrStateNormal;
- break;
- case 'n':
- buf_append_char(out_str, '\n');
- state = ParseCStrStateNormal;
- break;
- case 'r':
- buf_append_char(out_str, '\r');
- state = ParseCStrStateNormal;
- break;
- case 't':
- buf_append_char(out_str, '\t');
- state = ParseCStrStateNormal;
- break;
- case 'v':
- buf_append_char(out_str, '\v');
- state = ParseCStrStateNormal;
- break;
- default:
- // TODO octal escape sequence, hexadecimal escape sequence, and
- // universal character name
- return -1;
- }
- break;
- }
- }
-
- return -1;
-}
-
-static void process_macro(Context *c, Buf *name, Buf *value) {
- //fprintf(stderr, "macro '%s' = '%s'\n", buf_ptr(name), buf_ptr(value));
+static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) {
if (is_zig_keyword(name)) {
return;
}
- // maybe it's a character literal
- uint8_t ch;
- if (!parse_c_char_lit(value, &ch)) {
- AstNode *var_node = create_var_decl_node(c, buf_ptr(name), create_char_lit_node(c, ch));
- c->macro_table.put(name, var_node);
- return;
- }
- // maybe it's a string literal
- Buf str_lit = BUF_INIT;
- if (!parse_c_str_lit(value, &str_lit)) {
- AstNode *var_node = create_var_decl_node(c, buf_ptr(name), create_str_lit_node(c, &str_lit));
- c->macro_table.put(name, var_node);
- return;
- }
+ tokenize_c_macro(ctok, (const uint8_t *)char_ptr);
- // maybe it's an unsigned integer
- uint64_t uint;
- if (!parse_c_num_lit_unsigned(value, &uint)) {
- AstNode *var_node = create_var_decl_node(c, buf_ptr(name), create_num_lit_unsigned(c, uint));
- c->macro_table.put(name, var_node);
+ if (ctok->error) {
return;
}
- // maybe it's a symbol
- if (is_simple_symbol(value)) {
- // if it equals itself, ignore. for example, from stdio.h:
- // #define stdin stdin
- if (buf_eql_buf(name, value)) {
- return;
+ bool negate = false;
+ for (int i = 0; i < ctok->tokens.length; i += 1) {
+ bool is_first = (i == 0);
+ bool is_last = (i == ctok->tokens.length - 1);
+ CTok *tok = &ctok->tokens.at(i);
+ switch (tok->id) {
+ case CTokIdCharLit:
+ if (is_last && is_first) {
+ AstNode *var_node = create_var_decl_node(c, buf_ptr(name),
+ create_char_lit_node(c, tok->data.char_lit));
+ c->macro_table.put(name, var_node);
+ }
+ return;
+ case CTokIdStrLit:
+ if (is_last && is_first) {
+ AstNode *var_node = create_var_decl_node(c, buf_ptr(name),
+ create_str_lit_node(c, &tok->data.str_lit));
+ c->macro_table.put(name, var_node);
+ }
+ return;
+ case CTokIdNumLitInt:
+ if (is_last) {
+ AstNode *var_node = create_var_decl_node(c, buf_ptr(name),
+ create_num_lit_unsigned_negative(c, tok->data.num_lit_int, negate));
+ c->macro_table.put(name, var_node);
+ }
+ return;
+ case CTokIdNumLitFloat:
+ if (is_last) {
+ AstNode *var_node = create_var_decl_node(c, buf_ptr(name),
+ create_num_lit_float_negative(c, tok->data.num_lit_float, negate));
+ c->macro_table.put(name, var_node);
+ }
+ return;
+ case CTokIdSymbol:
+ if (is_last && is_first) {
+ // if it equals itself, ignore. for example, from stdio.h:
+ // #define stdin stdin
+ Buf *symbol_name = buf_create_from_buf(&tok->data.symbol);
+ if (buf_eql_buf(name, symbol_name)) {
+ return;
+ }
+ c->macro_symbols.append({name, symbol_name});
+ return;
+ }
+ case CTokIdMinus:
+ if (is_first) {
+ negate = true;
+ break;
+ } else {
+ return;
+ }
}
- c->macro_symbols.append({name, value});
}
}
@@ -1473,6 +1354,8 @@ static void process_symbol_macros(Context *c) {
}
static void process_preprocessor_entities(Context *c, ASTUnit &unit) {
+ CTokenize ctok = {{0}};
+
for (PreprocessedEntity *entity : unit.getLocalPreprocessingEntities()) {
switch (entity->getKind()) {
case PreprocessedEntity::InvalidKind:
@@ -1494,16 +1377,7 @@ static void process_preprocessor_entities(Context *c, ASTUnit &unit) {
}
const char *end_c = c->source_manager->getCharacterData(end_loc);
- Buf *value = buf_alloc();
- while (*end_c && *end_c != '\n') {
- buf_append_char(value, *end_c);
- if (end_c[0] == '\\' && end_c[1] == '\n') {
- end_c += 2;
- } else {
- end_c += 1;
- }
- }
- process_macro(c, buf_create_from_str(name), value);
+ process_macro(c, &ctok, buf_create_from_str(name), end_c);
}
}
}
test/run_tests.cpp
@@ -1390,6 +1390,10 @@ extern void (*fn_ptr)(void);
add_parseh_case("__cdecl doesn't mess up function pointers", R"SOURCE(
void foo(void (__cdecl *fn_ptr)(void));
)SOURCE", 1, "pub extern fn foo(fn_ptr: ?extern fn());");
+
+ add_parseh_case("comment after integer literal", R"SOURCE(
+#define SDL_INIT_VIDEO 0x00000020 /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+ )SOURCE", 1, "pub const SDL_INIT_VIDEO = 32;");
}
static void run_self_hosted_test(void) {
CMakeLists.txt
@@ -40,6 +40,7 @@ set(ZIG_SOURCES
"${CMAKE_SOURCE_DIR}/src/ast_render.cpp"
"${CMAKE_SOURCE_DIR}/src/bignum.cpp"
"${CMAKE_SOURCE_DIR}/src/tokenizer.cpp"
+ "${CMAKE_SOURCE_DIR}/src/c_tokenizer.cpp"
"${CMAKE_SOURCE_DIR}/src/parser.cpp"
"${CMAKE_SOURCE_DIR}/src/eval.cpp"
"${CMAKE_SOURCE_DIR}/src/analyze.cpp"