Commit 5f48463bdd
Changed files (2)
src/buffer.hpp
@@ -73,18 +73,48 @@ static inline Buf *buf_slice(Buf *in_buf, int start, int end) {
return out_buf;
}
-static inline void buf_append_str(Buf *buf, const char *str, int str_len) {
- if (str_len == -1)
- str_len = strlen(str);
-
+static inline void buf_append_mem(Buf *buf, const char *mem, int mem_len) {
+ assert(mem_len >= 0);
int old_len = buf_len(buf);
- buf_resize(buf, old_len + str_len);
- memcpy(buf_ptr(buf) + old_len, str, str_len);
+ buf_resize(buf, old_len + mem_len);
+ memcpy(buf_ptr(buf) + old_len, mem, mem_len);
buf->list.at(buf_len(buf)) = 0;
}
+static inline void buf_append_str(Buf *buf, const char *str) {
+ buf_append_mem(buf, str, strlen(str));
+}
+
static inline void buf_append_buf(Buf *buf, Buf *append_buf) {
- buf_append_str(buf, buf_ptr(append_buf), buf_len(append_buf));
+ buf_append_mem(buf, buf_ptr(append_buf), buf_len(append_buf));
+}
+
+static inline void buf_append_char(Buf *buf, uint8_t c) {
+ buf_append_mem(buf, (const char *)&c, 1);
+}
+
+static inline bool buf_eql_mem(Buf *buf, const char *mem, int mem_len) {
+ if (buf_len(buf) != mem_len)
+ return false;
+ return memcmp(buf_ptr(buf), mem, mem_len) == 0;
+}
+
+static inline bool buf_eql_str(Buf *buf, const char *str) {
+ return buf_eql_mem(buf, str, strlen(str));
+}
+
+static inline bool buf_eql_buf(Buf *buf, Buf *other) {
+ return buf_eql_mem(buf, buf_ptr(other), buf_len(other));
+}
+
+static inline void buf_splice_buf(Buf *buf, int start, int end, Buf *other) {
+ if (start != end)
+ zig_panic("TODO buf_splice_buf");
+
+ int old_buf_len = buf_len(buf);
+ buf_resize(buf, old_buf_len + buf_len(other));
+ memmove(buf_ptr(buf) + start + buf_len(other), buf_ptr(buf) + start, old_buf_len - start);
+ memcpy(buf_ptr(buf) + start, buf_ptr(other), buf_len(other));
}
// TODO this method needs work
src/main.cpp
@@ -154,18 +154,29 @@ struct Token {
enum TokenizeState {
TokenizeStateStart,
TokenizeStateDirective,
+ TokenizeStateDirectiveName,
+ TokenizeStateIncludeQuote,
+ TokenizeStateDirectiveEnd,
+ TokenizeStateInclude,
TokenizeStateSymbol,
TokenizeStateString,
TokenizeStateNumber,
};
struct Tokenize {
+ Buf *buf;
int pos;
TokenizeState state;
ZigList<Token> *tokens;
int line;
int column;
Token *cur_tok;
+ Buf *directive_name;
+ Buf *cur_dir_path;
+ uint8_t unquote_char;
+ int quote_start_pos;
+ Buf *include_path;
+ ZigList<char *> *include_paths;
};
__attribute__ ((format (printf, 2, 3)))
@@ -210,24 +221,72 @@ static void put_back(Tokenize *t, int count) {
t->pos -= count;
}
+static void begin_directive(Tokenize *t) {
+ t->state = TokenizeStateDirective;
+ begin_token(t, TokenIdDirective);
+ assert(!t->directive_name);
+ t->directive_name = buf_alloc();
+}
+
+static bool find_and_include_file(Tokenize *t, char *dir_path, char *file_path) {
+ Buf *full_path = buf_sprintf("%s/%s", dir_path, file_path);
+
+ FILE *f = fopen(buf_ptr(full_path), "rb");
+ if (!f)
+ return false;
+
+ Buf *contents = fetch_file(f);
+
+ buf_splice_buf(t->buf, t->pos, t->pos, contents);
+
+ return true;
+}
+
+static void render_include(Tokenize *t, Buf *target_path, char unquote_char) {
+ if (unquote_char == '"') {
+ if (find_and_include_file(t, buf_ptr(t->cur_dir_path), buf_ptr(target_path)))
+ return;
+ }
+ for (int i = 0; i < t->include_paths->length; i += 1) {
+ char *include_path = t->include_paths->at(i);
+ if (find_and_include_file(t, include_path, buf_ptr(target_path)))
+ return;
+ }
+ tokenize_error(t, "include path \"%s\" not found", buf_ptr(target_path));
+}
+
static void end_directive(Tokenize *t) {
- assert(t->cur_tok);
- t->cur_tok->end_pos = t->pos;
- t->cur_tok = nullptr;
+ end_token(t);
+ if (t->include_path) {
+ render_include(t, t->include_path, t->unquote_char);
+ t->include_path = nullptr;
+ }
t->state = TokenizeStateStart;
}
+static void end_directive_name(Tokenize *t) {
+ if (buf_eql_str(t->directive_name, "include")) {
+ t->state = TokenizeStateInclude;
+ t->directive_name = nullptr;
+ } else {
+ tokenize_error(t, "invalid directive name: \"%s\"", buf_ptr(t->directive_name));
+ }
+}
+
static void end_symbol(Tokenize *t) {
put_back(t, 1);
end_token(t);
t->state = TokenizeStateStart;
}
-static ZigList<Token> *tokenize(Buf *buf) {
+static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *cur_dir_path) {
Tokenize t = {0};
t.tokens = allocate<ZigList<Token>>(1);
- for (t.pos = 0; t.pos < buf_len(buf); t.pos += 1) {
- uint8_t c = buf_ptr(buf)[t.pos];
+ t.buf = buf;
+ t.cur_dir_path = cur_dir_path;
+ t.include_paths = include_paths;
+ for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
+ uint8_t c = buf_ptr(t.buf)[t.pos];
switch (t.state) {
case TokenizeStateStart:
switch (c) {
@@ -242,8 +301,11 @@ static ZigList<Token> *tokenize(Buf *buf) {
begin_token(&t, TokenIdNumberLiteral);
break;
case '#':
- t.state = TokenizeStateDirective;
- begin_token(&t, TokenIdDirective);
+ begin_directive(&t);
+ break;
+ case '"':
+ begin_token(&t, TokenIdStringLiteral);
+ t.state = TokenizeStateString;
break;
case '(':
begin_token(&t, TokenIdLParen);
@@ -269,10 +331,6 @@ static ZigList<Token> *tokenize(Buf *buf) {
begin_token(&t, TokenIdRBrace);
end_token(&t);
break;
- case '"':
- begin_token(&t, TokenIdStringLiteral);
- t.state = TokenizeStateString;
- break;
case ';':
begin_token(&t, TokenIdSemicolon);
end_token(&t);
@@ -286,8 +344,70 @@ static ZigList<Token> *tokenize(Buf *buf) {
}
break;
case TokenizeStateDirective:
- if (c == '\n') {
- end_directive(&t);
+ switch (c) {
+ case '\n':
+ end_directive_name(&t);
+ end_directive(&t);
+ break;
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\r':
+ case 0xb:
+ break;
+ case SYMBOL_CHAR:
+ t.state = TokenizeStateDirectiveName;
+ buf_append_char(t.directive_name, c);
+ break;
+ default:
+ tokenize_error(&t, "invalid directive character: '%c'", c);
+ break;
+ }
+ break;
+ case TokenizeStateDirectiveName:
+ switch (c) {
+ case WHITESPACE:
+ end_directive_name(&t);
+ break;
+ case SYMBOL_CHAR:
+ buf_append_char(t.directive_name, c);
+ break;
+ default:
+ tokenize_error(&t, "invalid directive name character: '%c'", c);
+ break;
+ }
+ break;
+ case TokenizeStateInclude:
+ switch (c) {
+ case WHITESPACE:
+ break;
+ case '<':
+ case '"':
+ t.state = TokenizeStateIncludeQuote;
+ t.quote_start_pos = t.pos;
+ t.unquote_char = (c == '<') ? '>' : '"';
+ break;
+ }
+ break;
+ case TokenizeStateIncludeQuote:
+ if (c == t.unquote_char) {
+ t.include_path = buf_slice(t.buf, t.quote_start_pos + 1, t.pos);
+ t.state = TokenizeStateDirectiveEnd;
+ }
+ break;
+ case TokenizeStateDirectiveEnd:
+ switch (c) {
+ case '\n':
+ end_directive(&t);
+ break;
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\r':
+ case 0xb:
+ break;
+ default:
+ tokenize_error(&t, "expected whitespace or newline: '%c'", c);
}
break;
case TokenizeStateSymbol:
@@ -333,6 +453,13 @@ static ZigList<Token> *tokenize(Buf *buf) {
case TokenizeStateDirective:
end_directive(&t);
break;
+ case TokenizeStateDirectiveName:
+ end_directive_name(&t);
+ end_directive(&t);
+ break;
+ case TokenizeStateInclude:
+ tokenize_error(&t, "missing include path");
+ break;
case TokenizeStateSymbol:
end_symbol(&t);
break;
@@ -342,6 +469,12 @@ static ZigList<Token> *tokenize(Buf *buf) {
case TokenizeStateNumber:
end_symbol(&t);
break;
+ case TokenizeStateIncludeQuote:
+ tokenize_error(&t, "unterminated include path");
+ break;
+ case TokenizeStateDirectiveEnd:
+ end_directive(&t);
+ break;
}
assert(!t.cur_tok);
return t.tokens;
@@ -374,144 +507,6 @@ static void print_tokens(Buf *buf, ZigList<Token> *tokens) {
}
}
-struct Preprocess {
- Buf *out_buf;
- Buf *in_buf;
- Token *token;
- ZigList<char *> *include_paths;
- Buf *cur_dir_path;
-};
-
-__attribute__ ((format (printf, 2, 3)))
-static void preprocess_error(Preprocess *p, const char *format, ...) {
- va_list ap;
- va_start(ap, format);
- fprintf(stderr, "Error: Line %d, column %d: ", p->token->start_line + 1, p->token->start_column + 1);
- vfprintf(stderr, format, ap);
- fprintf(stderr, "\n");
- va_end(ap);
- exit(EXIT_FAILURE);
-}
-
-enum IncludeState {
- IncludeStateStart,
- IncludeStateQuote,
-};
-
-static Buf *find_include_file(Preprocess *p, char *dir_path, char *file_path) {
- Buf *full_path = buf_sprintf("%s/%s", dir_path, file_path);
-
- FILE *f = fopen(buf_ptr(full_path), "rb");
- if (!f)
- return nullptr;
-
- return fetch_file(f);
-}
-
-static void render_include(Preprocess *p, Buf *target_path, char unquote_char) {
- if (unquote_char == '"') {
- Buf *file_contents = find_include_file(p, buf_ptr(p->cur_dir_path), buf_ptr(target_path));
- if (file_contents) {
- buf_append_buf(p->out_buf, file_contents);
- return;
- }
- }
- for (int i = 0; i < p->include_paths->length; i += 1) {
- char *include_path = p->include_paths->at(i);
- Buf *file_contents = find_include_file(p, include_path, buf_ptr(target_path));
- if (file_contents) {
- buf_append_buf(p->out_buf, file_contents);
- return;
- }
- }
- preprocess_error(p, "include path \"%s\" not found", buf_ptr(target_path));
-}
-
-static void parse_and_render_include(Preprocess *p, Buf *directive_buf, int pos) {
- int state = IncludeStateStart;
- char unquote_char;
- int quote_start_pos;
- for (; pos < buf_len(directive_buf); pos += 1) {
- uint8_t c = buf_ptr(directive_buf)[pos];
- switch (state) {
- case IncludeStateStart:
- switch (c) {
- case WHITESPACE:
- break;
- case '<':
- case '"':
- state = IncludeStateQuote;
- quote_start_pos = pos;
- unquote_char = (c == '<') ? '>' : '"';
- break;
-
- }
- break;
- case IncludeStateQuote:
- if (c == unquote_char) {
- Buf *include_path = buf_slice(directive_buf, quote_start_pos + 1, pos);
- render_include(p, include_path, unquote_char);
- return;
- }
- break;
- }
- }
- preprocess_error(p, "include directive missing path");
-}
-
-static void render_directive(Preprocess *p, Buf *directive_buf) {
- for (int pos = 1; pos < buf_len(directive_buf); pos += 1) {
- uint8_t c = buf_ptr(directive_buf)[pos];
- switch (c) {
- case SYMBOL_CHAR:
- break;
- default:
- pos -= 1;
- Buf *directive_name = buf_from_mem(buf_ptr(directive_buf) + 1, pos);
- if (strcmp(buf_ptr(directive_name), "include") == 0) {
- parse_and_render_include(p, directive_buf, pos);
- } else {
- preprocess_error(p, "invalid directive: \"%s\"", buf_ptr(directive_name));
- }
- return;
- }
- }
-}
-
-static void render_token(Preprocess *p) {
- Buf *token_buf = buf_slice(p->in_buf, p->token->start_pos, p->token->end_pos);
- switch (p->token->id) {
- case TokenIdDirective:
- render_directive(p, token_buf);
- break;
- default:
- buf_append_buf(p->out_buf, token_buf);
- if (p->token->id == TokenIdSemicolon ||
- p->token->id == TokenIdLBrace ||
- p->token->id == TokenIdRBrace)
- {
- buf_append_str(p->out_buf, "\n", -1);
- } else {
- buf_append_str(p->out_buf, " ", -1);
- }
- }
-}
-
-static Buf *preprocess(Buf *in_buf, ZigList<Token> *tokens,
- ZigList<char *> *include_paths, Buf *cur_dir_path)
-{
- Preprocess p = {0};
- p.out_buf = buf_alloc();
- p.in_buf = in_buf;
- p.include_paths = include_paths;
- p.cur_dir_path = cur_dir_path;
- for (int i = 0; i < tokens->length; i += 1) {
- p.token = &tokens->at(i);
- render_token(&p);
- }
- return p.out_buf;
-}
-
char cur_dir[1024];
int main(int argc, char **argv) {
@@ -566,14 +561,16 @@ int main(int argc, char **argv) {
fprintf(stderr, "Original source:\n%s\n", buf_ptr(in_data));
- ZigList<Token> *tokens = tokenize(in_data);
+ ZigList<Token> *tokens = tokenize(in_data, &include_paths, cur_dir_path);
fprintf(stderr, "\nTokens:\n");
print_tokens(in_data, tokens);
+ /*
Buf *preprocessed_source = preprocess(in_data, tokens, &include_paths, cur_dir_path);
fprintf(stderr, "\nPreprocessed source:\n%s\n", buf_ptr(preprocessed_source));
+ */
return EXIT_SUCCESS;