Commit 72be61fc0a
Changed files (7)
src/buffer.cpp
@@ -36,7 +36,7 @@ void buf_appendf(Buf *buf, const char *format, ...) {
int orig_len = buf_len(buf);
- buf_resize(buf, orig_len + required_size);
+ buf_resize(buf, orig_len + len1);
int len2 = vsnprintf(buf_ptr(buf) + orig_len, required_size, format, ap2);
assert(len2 == len1);
src/buffer.hpp
@@ -55,6 +55,14 @@ static inline void buf_init_from_mem(Buf *buf, const char *ptr, int len) {
buf->list.at(buf_len(buf)) = 0;
}
+static inline void buf_init_from_str(Buf *buf, const char *str) {
+ buf_init_from_mem(buf, str, strlen(str));
+}
+
+static inline void buf_init_from_buf(Buf *buf, Buf *other) {
+ buf_init_from_mem(buf, buf_ptr(other), buf_len(other));
+}
+
static inline Buf *buf_create_from_mem(const char *ptr, int len) {
Buf *buf = allocate<Buf>(1);
buf_init_from_mem(buf, ptr, len);
src/main.cpp
@@ -28,7 +28,6 @@ static int usage(const char *arg0) {
fprintf(stderr, "Usage: %s [command] [options] target\n"
"Commands:\n"
" build create an executable from target\n"
- " link turn a .o file into an executable\n"
"Options:\n"
" --output output file\n"
" --version print version number and exit\n"
src/parser.cpp
@@ -78,3 +78,7 @@ void ast_print(AstNode *node, int indent) {
break;
}
}
+
+AstNode *ast_create_root(Token *token) {
+ return nullptr;
+}
src/parser.hpp
@@ -89,4 +89,6 @@ const char *node_type_str(NodeType node_type);
void ast_print(AstNode *node, int indent);
+AstNode *ast_create_root(Token *token);
+
#endif
src/parsergen.cpp
@@ -187,29 +187,39 @@ struct RuleNode {
};
-enum ParserStateType {
- ParserStateTypeError,
- ParserStateTypeOk,
- ParserStateTypeCapture,
+enum CodeGenType {
+ CodeGenTypeTransition,
+ CodeGenTypeError,
+ CodeGenTypeSave,
+ CodeGenTypePushNode,
+ CodeGenTypeCapture,
+ CodeGenTypePopNode,
+ CodeGenTypeEatToken,
};
-struct ParserStateError {
+struct CodeGenError {
Buf *msg;
};
-struct ParserStateCapture {
+struct CodeGenCapture {
Buf *body;
+ bool is_root;
+ Buf *field_names;
+};
+
+struct CodeGen {
+ CodeGenType type;
+ union {
+ CodeGenError error;
+ CodeGenCapture capture;
+ };
};
struct ParserState {
- ParserStateType type;
+ ZigList<CodeGen *> code_gen_list;
// One for each token ID.
ParserState **transition;
int index;
- union {
- ParserStateError error;
- ParserStateCapture capture;
- };
};
enum LexState {
@@ -234,6 +244,7 @@ struct Gen {
ZigList<ParserState *> transition_table;
ZigList<Token *> tokens;
RuleNode *root;
+ int biggest_tuple_len;
Buf *in_buf;
LexState lex_state;
@@ -249,9 +260,8 @@ struct Gen {
int lex_body_end;
};
-static ParserState *create_state(Gen *g, ParserStateType type) {
+static ParserState *create_state(Gen *g) {
ParserState *state = allocate<ParserState>(1);
- state->type = type;
state->index = g->transition_table.length;
state->transition = allocate<ParserState*>(g->tokens.length);
g->transition_table.append(state);
@@ -264,28 +274,93 @@ static void fill_state_with_transition(Gen *g, ParserState *source, ParserState
}
}
-static void gen(Gen *g, RuleNode *node) {
+static void state_add_code(ParserState *state, CodeGen *code) {
+ state->code_gen_list.append(code);
+}
+
+static void state_add_save_token(ParserState *state) {
+ CodeGen *code = allocate<CodeGen>(1);
+ code->type = CodeGenTypeSave;
+ state_add_code(state, code);
+}
+
+static void state_add_error(ParserState *state, Buf *msg) {
+ CodeGen *code = allocate<CodeGen>(1);
+ code->type = CodeGenTypeError;
+ code->error.msg = msg;
+ state_add_code(state, code);
+}
+
+static void state_add_transition(ParserState *state) {
+ CodeGen *code = allocate<CodeGen>(1);
+ code->type = CodeGenTypeTransition;
+ state_add_code(state, code);
+}
+
+static void state_add_push_node(ParserState *state) {
+ CodeGen *code = allocate<CodeGen>(1);
+ code->type = CodeGenTypePushNode;
+ state_add_code(state, code);
+}
+
+static CodeGen *codegen_create_capture(Buf *body, bool is_root, int field_name_count) {
+ CodeGen *code = allocate<CodeGen>(1);
+ code->type = CodeGenTypeCapture;
+ code->capture.body = body;
+ code->capture.is_root = is_root;
+ code->capture.field_names = allocate<Buf>(field_name_count);
+ return code;
+}
+
+static void state_add_pop_node(ParserState *state) {
+ CodeGen *code = allocate<CodeGen>(1);
+ code->type = CodeGenTypePopNode;
+ state_add_code(state, code);
+}
+
+static void state_add_eat_token(ParserState *state) {
+ CodeGen *code = allocate<CodeGen>(1);
+ code->type = CodeGenTypeEatToken;
+ state_add_code(state, code);
+}
+
+static void gen(Gen *g, RuleNode *node, Buf *out_field_name) {
switch (node->type) {
case RuleNodeTypeToken:
{
- ParserState *ok_state = create_state(g, ParserStateTypeOk);
- ParserState *err_state = create_state(g, ParserStateTypeError);
+ buf_init_from_str(out_field_name, "token");
+
+ state_add_save_token(g->cur_state);
- err_state->error.msg = buf_sprintf("expected token '%s'", buf_ptr(&node->token.token->name));
+ ParserState *ok_state = create_state(g);
+ ParserState *err_state = create_state(g);
+ state_add_error(err_state, buf_sprintf("expected token '%s'", buf_ptr(&node->token.token->name)));
fill_state_with_transition(g, g->cur_state, err_state);
g->cur_state->transition[node->token.token->id] = ok_state;
+ state_add_transition(g->cur_state);
+ state_add_eat_token(g->cur_state);
+
g->cur_state = ok_state;
}
break;
case RuleNodeTypeTuple:
{
+ buf_init_from_str(out_field_name, "node");
+
+ state_add_push_node(g->cur_state);
+
+ bool is_root = (node == g->root);
+ int field_name_count = node->tuple.children.length;
+ CodeGen *code = codegen_create_capture(&node->tuple.body, is_root, field_name_count);
+
for (int i = 0; i < node->tuple.children.length; i += 1) {
RuleNode *child = node->tuple.children.at(i);
- gen(g, child);
+ gen(g, child, &code->capture.field_names[i]);
}
- g->cur_state->type = ParserStateTypeCapture;
- g->cur_state->capture.body = &node->tuple.body;
+ state_add_code(g->cur_state, code);
+
+ state_add_pop_node(g->cur_state);
}
break;
case RuleNodeTypeMany:
@@ -301,7 +376,10 @@ static void gen(Gen *g, RuleNode *node) {
zig_panic("TODO");
break;
case RuleNodeTypeSubRule:
- zig_panic("TODO");
+ {
+ RuleNode *child = node->sub_rule.child;
+ gen(g, child, out_field_name);
+ }
break;
}
}
@@ -407,6 +485,9 @@ static void end_token_name(Gen *g) {
assert(g->lex_cur_rule->type == RuleNodeTypeTuple);
g->lex_cur_rule->tuple.children.append(node);
+ g->biggest_tuple_len = max(g->biggest_tuple_len, g->lex_cur_rule->tuple.children.length);
+
+
lex_pop_stack(g);
}
@@ -571,6 +652,76 @@ static void initialize_rules(Gen *g) {
}
}
+enum TemplateState {
+ TemplateStateStart,
+ TemplateStateDollar,
+ TemplateStateNumber,
+};
+
+static Buf *fill_template(Buf *body, const char *result_name, Buf *field_names) {
+ //fprintf(stderr, "fill template input:\n%s\n", buf_ptr(body));
+ Buf *result = buf_alloc();
+ TemplateState state = TemplateStateStart;
+ int digit_start;
+ for (int i = 0; i < buf_len(body); i += 1) {
+ uint8_t c = buf_ptr(body)[i];
+ switch (state) {
+ case TemplateStateStart:
+ switch (c) {
+ case '$':
+ state = TemplateStateDollar;
+ break;
+ default:
+ buf_append_char(result, c);
+ break;
+ }
+ break;
+ case TemplateStateDollar:
+ switch (c) {
+ case '$':
+ buf_append_str(result, result_name);
+ state = TemplateStateStart;
+ break;
+ case DIGIT:
+ digit_start = i;
+ state = TemplateStateNumber;
+ break;
+ default:
+ buf_append_char(result, '$');
+ buf_append_char(result, c);
+ state = TemplateStateStart;
+ break;
+ }
+ break;
+ case TemplateStateNumber:
+ switch (c) {
+ case DIGIT:
+ // nothing
+ break;
+ default:
+ {
+ Buf *num_buf = buf_create_from_mem(&buf_ptr(body)[digit_start], i - digit_start);
+ int index = atoi(buf_ptr(num_buf)) - 1;
+ buf_appendf(result, "(top_node->data[%d].%s)%c",
+ index, buf_ptr(&field_names[index]), c);
+
+ state = TemplateStateStart;
+ }
+ break;
+ }
+ break;
+ }
+ }
+ switch (state) {
+ case TemplateStateStart:
+ // OK
+ break;
+ default:
+ zig_panic("unable to fill grammar template");
+ }
+ //fprintf(stderr, "fill template output:\n%s\n", buf_ptr(result));
+ return result;
+}
int main(int argc, char **argv) {
const char *in_filename = argv[1];
@@ -603,8 +754,9 @@ int main(int argc, char **argv) {
g.root = g.rules.at(0);
- g.cur_state = create_state(&g, ParserStateTypeOk);
- gen(&g, g.root);
+ g.cur_state = create_state(&g);
+ Buf root_field_name = {0};
+ gen(&g, g.root, &root_field_name);
fprintf(out_f, "/* This file is generated by parsergen.cpp */\n");
fprintf(out_f, "\n");
@@ -627,15 +779,14 @@ int main(int argc, char **argv) {
}
fprintf(out_f, "\n");
- /* TODO
- fprintf(out_f, "struct ParserGenNode{\n");
+ fprintf(out_f, "struct ParserGenNode {\n");
+ fprintf(out_f, " int next_index;\n");
fprintf(out_f, " union {\n");
- fprintf(out_f, " [%d];\n", biggest_tuple_len);
fprintf(out_f, " Token *token;\n");
- fprintf(out_f, " };\n");
+ fprintf(out_f, " AstNode *node;\n");
+ fprintf(out_f, " } data[%d];\n", g.biggest_tuple_len);
fprintf(out_f, "};\n");
fprintf(out_f, "\n");
- */
fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens) {\n");
@@ -655,39 +806,66 @@ int main(int argc, char **argv) {
fprintf(out_f, " int state = 0;\n");
+ fprintf(out_f, " int token_index = 0;\n");
+ fprintf(out_f, " Token *token = &tokens->at(token_index);\n");
fprintf(out_f, " AstNode *root = nullptr;\n");
+ fprintf(out_f, " ZigList<ParserGenNode *> stack = {0};\n");
+ fprintf(out_f, " ParserGenNode *top_node = nullptr;\n");
- fprintf(out_f, " for (int i = 0; i < tokens->length; i += 1) {\n");
- fprintf(out_f, " Token *token = &tokens->at(i);\n");
+ fprintf(out_f, " for (;;) {\n");
fprintf(out_f, " switch (state) {\n");
- for (int i = 0; i < g.transition_table.length; i += 1) {
- ParserState *state = g.transition_table.at(i);
- fprintf(out_f, " case %d:\n", i);
- switch (state->type) {
- case ParserStateTypeError:
- fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg));
- break;
- case ParserStateTypeOk:
- fprintf(out_f, " assert(transition[%d][token->id] >= 0);\n", state->index);
- fprintf(out_f, " assert(transition[%d][token->id] < %d);\n",
- state->index, g.transition_table.length);
- fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
- break;
- case ParserStateTypeCapture:
- // TODO fprintf(out_f, " %s\n", buf_ptr(state->capture.body));
- fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
- break;
+ for (int state_i = 0; state_i < g.transition_table.length; state_i += 1) {
+ ParserState *state = g.transition_table.at(state_i);
+ fprintf(out_f, " case %d: {\n", state_i);
+ for (int code_i = 0; code_i < state->code_gen_list.length; code_i += 1) {
+ CodeGen *code = state->code_gen_list.at(code_i);
+ switch (code->type) {
+ case CodeGenTypeTransition:
+ fprintf(out_f, " assert(transition[%d][token->id] >= 0);\n", state->index);
+ fprintf(out_f, " assert(transition[%d][token->id] < %d);\n",
+ state->index, g.transition_table.length);
+ fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
+ break;
+ case CodeGenTypeError:
+ fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(code->error.msg));
+ break;
+ case CodeGenTypeSave:
+ fprintf(out_f, " top_node->data[top_node->next_index++].token = token;\n");
+ break;
+ case CodeGenTypePushNode:
+ fprintf(out_f, " top_node = allocate<ParserGenNode>(1);\n");
+ fprintf(out_f, " stack.append(top_node);\n");
+ break;
+ case CodeGenTypeCapture:
+ if (code->capture.is_root) {
+ Buf *code_text = fill_template(code->capture.body, "root", code->capture.field_names);
+ fprintf(out_f, "%s\n", buf_ptr(code_text));
+ fprintf(out_f, " return root;\n");
+ } else {
+ zig_panic("TODO capture non-root");
+ }
+ break;
+ case CodeGenTypePopNode:
+ fprintf(out_f, " stack.pop();\n");
+ fprintf(out_f, " top_node = stack.length ? stack.last() : nullptr;\n");
+ break;
+ case CodeGenTypeEatToken:
+ fprintf(out_f, " token_index += 1;\n");
+ fprintf(out_f, " token = (token_index < tokens->length) ? &tokens->at(token_index) : nullptr;\n");
+ break;
+ }
}
fprintf(out_f, " break;\n");
+ fprintf(out_f, " }\n");
}
fprintf(out_f, " default:\n");
fprintf(out_f, " zig_panic(\"unreachable\");\n");
fprintf(out_f, " }\n");
fprintf(out_f, " }\n");
- fprintf(out_f, " return root;\n");
+ fprintf(out_f, " zig_panic(\"unreachable\");\n");
fprintf(out_f, "}\n");
-
+ return 0;
}
README.md
@@ -26,6 +26,8 @@ readable, safe, optimal, and concise code to solve any computing problem.
* Resilient to parsing errors to make IDE integration work well.
* Source code is UTF-8.
* Shebang line OK so language can be used for "scripting" as well.
+ * Ability to mark functions as test and automatically run them in test mode.
+ * Memory zeroed by default, unless you initialize with "uninitialized".
## Roadmap