Commit 1b24f4c73c
Changed files (7)
src/grammar.txt
@@ -1,57 +0,0 @@
-Root<node> : many(FnDecl) token(EOF) {
- $$ = ast_create_root($1);
-};
-
-FnDecl<node> : token(Fn) token(Symbol) token(LParen) list(ParamDecl, token(Comma)) token(RParen) token(Arrow) Type Block {
- $$ = ast_create_fn_decl($2, $4, $7, $8);
-} | token(Fn) token(Symbol) token(LParen) list(ParamDecl, token(Comma)) token(RParen) Block {
- $$ = ast_create_void_fn_decl($2, $4, $6);
-};
-
-ParamDecl<node> : token(Symbol) token(Colon) Type {
- $$ = ast_create_param_decl($1, $2);
-};
-
-Type<node> : token(Symbol) {
- $$ = ast_create_symbol_type($1);
-} | PointerType {
- $$ = $1;
-};
-
-PointerType<node> : token(Star) token(Const) Type {
- $$ = ast_create_pointer_type($2, $3);
-} | token(Star) token(Mut) Type {
- $$ = ast_create_pointer_type($2, $3);
-};
-
-Block<node> : token(LBrace) many(Statement) Expression token(RBrace) {
- $$ = ast_create_expr_block($2, $3);
-} | token(LBrace) many(Statement) token(RBrace) {
- $$ = ast_create_block($2);
-};
-
-Statement<node> : ExpressionStatement {
- $$ = $1;
-} | ReturnStatement {
- $$ = $1;
-};
-
-ExpressionStatement<node> : Expression token(Semicolon) {
- $$ = ast_create_expression_statement($1);
-};
-
-ReturnStatement<node> : token(Return) Expression token(Semicolon) {
- $$ = ast_create_return_statement($2);
-};
-
-Expression<node> : token(Number) {
- $$ = ast_create_number($1);
-} | token(String) {
- $$ = ast_create_string($1);
-} | FnCall {
- $$ = $1;
-};
-
-FnCall<node> : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) {
- $$ = ast_create_fn_call($1, $3);
-};
src/main.cpp
@@ -89,6 +89,8 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
AstNode *root = ast_parse(in_data, tokens);
assert(root);
+ fprintf(stderr, "\nAST:\n");
+ fprintf(stderr, "------\n");
ast_print(root, 0);
@@ -135,7 +137,7 @@ int main(int argc, char **argv) {
} else {
switch (cmd) {
case CmdNone:
- zig_panic("unreachable");
+ zig_unreachable();
case CmdBuild:
if (!in_file) {
in_file = arg;
@@ -154,6 +156,6 @@ int main(int argc, char **argv) {
return build(arg0, in_file, out_file, &include_paths);
}
- zig_panic("unreachable");
+ zig_unreachable();
}
src/parser.cpp
@@ -41,7 +41,7 @@ const char *node_type_str(NodeType node_type) {
case NodeTypeFnCall:
return "FnCall";
}
- zig_panic("unreachable");
+ zig_unreachable();
}
void ast_print(AstNode *node, int indent) {
@@ -82,28 +82,321 @@ void ast_print(AstNode *node, int indent) {
struct ParseContext {
Buf *buf;
AstNode *root;
+ ZigList<Token> *tokens;
};
-AstNode *ast_create_root(void) {
- zig_panic("TODO create root");
+static AstNode *ast_create_node(NodeType type) {
+ AstNode *node = allocate<AstNode>(1);
+ node->type = type;
+ return node;
}
-void ast_invalid_token_error(Buf *buf, Token *token) {
+static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
+ buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
+}
+
+static void ast_invalid_token_error(ParseContext *pc, Token *token) {
Buf token_value = {0};
- buf_init_from_mem(&token_value, buf_ptr(buf) + token->start_pos, token->end_pos - token->start_pos);
+ ast_buf_from_token(pc, token, &token_value);
ast_error(token, "invalid token: '%s'", buf_ptr(&token_value));
}
-void ast_parse_fn_decls(ParseContext *pc, ZigList<AstNode *> *fn_decls) {
- zig_panic("TODO parse fn decls");
+static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index);
+
+
+static void ast_expect_token(ParseContext *pc, Token *token, TokenId token_id) {
+ if (token->id != token_id) {
+ ast_invalid_token_error(pc, token);
+ }
+}
+
+/*
+Type : token(Symbol) | PointerType;
+PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type;
+*/
+static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) {
+ AstNode *node = ast_create_node(NodeTypeType);
+
+ Token *token = &pc->tokens->at(token_index);
+ token_index += 1;
+
+ if (token->id == TokenIdSymbol) {
+ node->data.type.type = AstNodeTypeTypePrimitive;
+ ast_buf_from_token(pc, token, &node->data.type.primitive_name);
+ } else if (token->id == TokenIdStar) {
+ Token *const_or_mut = &pc->tokens->at(token_index);
+ token_index += 1;
+ if (const_or_mut->id == TokenIdKeywordMut) {
+ node->data.type.is_const = false;
+ } else if (const_or_mut->id == TokenIdKeywordConst) {
+ node->data.type.is_const = true;
+ } else {
+ ast_invalid_token_error(pc, const_or_mut);
+ }
+
+ node->data.type.child_type = ast_parse_type(pc, token_index, &token_index);
+ } else {
+ ast_invalid_token_error(pc, token);
+ }
+
+ *new_token_index = token_index;
+ return node;
+}
+
+/*
+ParamDecl<node> : token(Symbol) token(Colon) Type {
+};
+*/
+static AstNode *ast_parse_param_decl(ParseContext *pc, int token_index, int *new_token_index) {
+ AstNode *node = ast_create_node(NodeTypeParamDecl);
+
+ Token *param_name = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, param_name, TokenIdSymbol);
+
+ ast_buf_from_token(pc, param_name, &node->data.param_decl.name);
+
+ Token *colon = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, colon, TokenIdColon);
+
+ node->data.param_decl.type = ast_parse_type(pc, token_index, &token_index);
+
+ *new_token_index = token_index;
+ return node;
+}
+
+
+static void ast_parse_param_decl_list(ParseContext *pc, int token_index, int *new_token_index,
+ ZigList<AstNode *> *params)
+{
+ Token *l_paren = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, l_paren, TokenIdLParen);
+
+ Token *token = &pc->tokens->at(token_index);
+ if (token->id == TokenIdRParen) {
+ token_index += 1;
+ *new_token_index = token_index;
+ return;
+ }
+
+ for (;;) {
+ AstNode *param_decl_node = ast_parse_param_decl(pc, token_index, &token_index);
+ params->append(param_decl_node);
+
+ Token *token = &pc->tokens->at(token_index);
+ token_index += 1;
+ if (token->id == TokenIdRParen) {
+ *new_token_index = token_index;
+ return;
+ } else {
+ ast_expect_token(pc, token, TokenIdComma);
+ }
+ }
+ zig_unreachable();
+}
+
+static void ast_parse_fn_call_param_list(ParseContext *pc, int token_index, int *new_token_index,
+ ZigList<AstNode*> *params)
+{
+ Token *l_paren = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, l_paren, TokenIdLParen);
+
+ Token *token = &pc->tokens->at(token_index);
+ if (token->id == TokenIdRParen) {
+ token_index += 1;
+ *new_token_index = token_index;
+ return;
+ }
+
+ for (;;) {
+ AstNode *expr = ast_parse_expression(pc, token_index, &token_index);
+ params->append(expr);
+
+ Token *token = &pc->tokens->at(token_index);
+ token_index += 1;
+ if (token->id == TokenIdRParen) {
+ *new_token_index = token_index;
+ return;
+ } else {
+ ast_expect_token(pc, token, TokenIdComma);
+ }
+ }
+ zig_unreachable();
+}
+
+/*
+FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
+*/
+static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_token_index) {
+ AstNode *node = ast_create_node(NodeTypeFnCall);
+
+ Token *fn_name = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, fn_name, TokenIdSymbol);
+
+ ast_buf_from_token(pc, fn_name, &node->data.fn_call.name);
+
+ ast_parse_fn_call_param_list(pc, token_index, &token_index, &node->data.fn_call.params);
+
+ *new_token_index = token_index;
+ return node;
+}
+
+static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index) {
+ AstNode *node = ast_create_node(NodeTypeExpression);
+
+ Token *token = &pc->tokens->at(token_index);
+ if (token->id == TokenIdSymbol) {
+ node->data.expression.type = AstNodeExpressionTypeFnCall;
+ node->data.expression.data.fn_call = ast_parse_fn_call(pc, token_index, &token_index);
+ } else if (token->id == TokenIdNumberLiteral) {
+ node->data.expression.type = AstNodeExpressionTypeNumber;
+ ast_buf_from_token(pc, token, &node->data.expression.data.number);
+ token_index += 1;
+ } else if (token->id == TokenIdStringLiteral) {
+ node->data.expression.type = AstNodeExpressionTypeString;
+ ast_buf_from_token(pc, token, &node->data.expression.data.string);
+ token_index += 1;
+ } else {
+ ast_invalid_token_error(pc, token);
+ }
+
+ *new_token_index = token_index;
+ return node;
+}
+
+/*
+Statement : ExpressionStatement | ReturnStatement ;
+
+ExpressionStatement : Expression token(Semicolon) ;
+
+ReturnStatement : token(Return) Expression token(Semicolon) ;
+
+Expression : token(Number) | token(String) | FnCall ;
+
+FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
+*/
+static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_token_index) {
+ AstNode *node = ast_create_node(NodeTypeStatement);
+
+ Token *token = &pc->tokens->at(token_index);
+ if (token->id == TokenIdKeywordReturn) {
+ token_index += 1;
+ node->data.statement.type = AstNodeStatementTypeReturn;
+ node->data.statement.data.retrn.expression = ast_parse_expression(pc, token_index, &token_index);
+
+ Token *semicolon = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, semicolon, TokenIdSemicolon);
+ } else if (token->id == TokenIdSymbol ||
+ token->id == TokenIdStringLiteral ||
+ token->id == TokenIdNumberLiteral)
+ {
+ node->data.statement.type = AstNodeStatementTypeExpression;
+ node->data.statement.data.expr.expression = ast_parse_expression(pc, token_index, &token_index);
+
+ Token *semicolon = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, semicolon, TokenIdSemicolon);
+ } else {
+ ast_invalid_token_error(pc, token);
+ }
+
+ *new_token_index = token_index;
+ return node;
+}
+
+/*
+Block : token(LBrace) many(Statement) token(RBrace);
+*/
+static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_token_index) {
+ AstNode *node = ast_create_node(NodeTypeBlock);
+
+ Token *l_brace = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, l_brace, TokenIdLBrace);
+
+ for (;;) {
+ Token *token = &pc->tokens->at(token_index);
+ if (token->id == TokenIdRBrace) {
+ token_index += 1;
+ *new_token_index = token_index;
+ return node;
+ } else {
+ AstNode *statement_node = ast_parse_statement(pc, token_index, &token_index);
+ node->data.block.statements.append(statement_node);
+ }
+ }
+ zig_unreachable();
+}
+
+/*
+FnDecl : token(Fn) token(Symbol) ParamDeclList option(token(Arrow) Type) Block;
+*/
+static AstNode *ast_parse_fn_decl(ParseContext *pc, int token_index, int *new_token_index) {
+ AstNode *node = ast_create_node(NodeTypeFnDecl);
+
+ Token *fn_token = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, fn_token, TokenIdKeywordFn);
+
+ Token *fn_name = &pc->tokens->at(token_index);
+ token_index += 1;
+ ast_expect_token(pc, fn_name, TokenIdSymbol);
+
+ ast_buf_from_token(pc, fn_name, &node->data.fn_decl.name);
+
+
+ ast_parse_param_decl_list(pc, token_index, &token_index, &node->data.fn_decl.params);
+
+ Token *arrow = &pc->tokens->at(token_index);
+ token_index += 1;
+ if (arrow->id == TokenIdArrow) {
+ node->data.fn_decl.return_type = ast_parse_type(pc, token_index, &token_index);
+ } else if (arrow->id == TokenIdLBrace) {
+ node->data.fn_decl.return_type = nullptr;
+ } else {
+ ast_invalid_token_error(pc, arrow);
+ }
+
+ node->data.fn_decl.body = ast_parse_block(pc, token_index, &token_index);
+
+ *new_token_index = token_index;
+ return node;
+}
+
+
+static void ast_parse_fn_decl_list(ParseContext *pc, int token_index, ZigList<AstNode *> *fn_decls,
+ int *new_token_index)
+{
+ for (;;) {
+ Token *token = &pc->tokens->at(token_index);
+ if (token->id == TokenIdKeywordFn) {
+ AstNode *fn_decl_node = ast_parse_fn_decl(pc, token_index, &token_index);
+ fn_decls->append(fn_decl_node);
+ } else {
+ *new_token_index = token_index;
+ return;
+ }
+ }
+ zig_unreachable();
}
AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens) {
ParseContext pc = {0};
pc.buf = buf;
- pc.root = ast_create_root();
+ pc.root = ast_create_node(NodeTypeRoot);
+ pc.tokens = tokens;
- ast_parse_fn_decls(&pc, &pc.root->data.root.fn_decls);
+ int new_token_index;
+ ast_parse_fn_decl_list(&pc, 0, &pc.root->data.root.fn_decls, &new_token_index);
+
+ if (new_token_index != tokens->length - 1) {
+ ast_invalid_token_error(&pc, &tokens->at(new_token_index));
+ }
return pc.root;
}
src/parser.hpp
@@ -44,20 +44,49 @@ enum AstNodeTypeType {
struct AstNodeType {
AstNodeTypeType type;
- AstNode *child;
+ Buf primitive_name;
+ AstNode *child_type;
+ bool is_const;
};
-struct AstNodePointerType {
- AstNode *const_or_mut;
- AstNode *type;
+struct AstNodeBlock {
+ ZigList<AstNode *> statements;
};
-struct AstNodeBlock {
- ZigList<AstNode *> expressions;
+enum AstNodeStatementType {
+ AstNodeStatementTypeExpression,
+ AstNodeStatementTypeReturn,
+};
+
+struct AstNodeStatementExpression {
+ AstNode *expression;
+};
+
+struct AstNodeStatementReturn {
+ AstNode *expression;
+};
+
+struct AstNodeStatement {
+ AstNodeStatementType type;
+ union {
+ AstNodeStatementExpression expr;
+ AstNodeStatementReturn retrn;
+ } data;
+};
+
+enum AstNodeExpressionType {
+ AstNodeExpressionTypeNumber,
+ AstNodeExpressionTypeString,
+ AstNodeExpressionTypeFnCall,
};
struct AstNodeExpression {
- AstNode *child;
+ AstNodeExpressionType type;
+ union {
+ Buf number;
+ Buf string;
+ AstNode *fn_call;
+ } data;
};
struct AstNodeFnCall {
@@ -74,13 +103,14 @@ struct AstNode {
AstNodeType type;
AstNodeParamDecl param_decl;
AstNodeBlock block;
+ AstNodeStatement statement;
AstNodeExpression expression;
AstNodeFnCall fn_call;
} data;
};
__attribute__ ((format (printf, 2, 3)))
-void ast_error(Token *token, const char *format, ...);
+void ast_token_error(Token *token, const char *format, ...);
void ast_invalid_token_error(Buf *buf, Token *token);
@@ -91,6 +121,4 @@ const char *node_type_str(NodeType node_type);
void ast_print(AstNode *node, int indent);
-AstNode *ast_create_root(void);
-
#endif
src/tokenizer.hpp
@@ -10,7 +10,6 @@
#include "buffer.hpp"
-/*
enum TokenId {
TokenIdEof,
TokenIdSymbol,
@@ -32,30 +31,6 @@ enum TokenId {
TokenIdArrow,
TokenIdDash,
};
-*/
-
-// TODO: debug delete this
-enum TokenId {
- TokenIdLParen = 0,
- TokenIdRParen = 1,
- TokenIdEof = 2,
- TokenIdStar = 3,
- TokenIdPlus = 4,
- TokenIdSymbol,
- TokenIdKeywordFn,
- TokenIdKeywordReturn,
- TokenIdKeywordMut,
- TokenIdKeywordConst,
- TokenIdComma,
- TokenIdLBrace,
- TokenIdRBrace,
- TokenIdStringLiteral,
- TokenIdSemicolon,
- TokenIdNumberLiteral,
- TokenIdColon,
- TokenIdArrow,
- TokenIdDash,
-};
struct Token {
TokenId id;
src/util.hpp
@@ -20,6 +20,12 @@ void zig_panic(const char *format, ...)
__attribute__ ((noreturn))
__attribute__ ((format (printf, 1, 2)));
+__attribute__((cold))
+__attribute__ ((noreturn))
+static inline void zig_unreachable(void) {
+ zig_panic("unreachable");
+}
+
template<typename T>
__attribute__((malloc)) static inline T *allocate_nonzero(size_t count) {
T *ptr = reinterpret_cast<T*>(malloc(count * sizeof(T)));
README.md
@@ -69,3 +69,31 @@ zig | C equivalent | Description
f128 | long double | 128-bit IEE754 floating point
isize | ssize_t | signed pointer sized integer
usize | size_t | unsigned pointer sized integer
+
+### Grammar
+
+```
+Root : many(FnDecl) token(EOF);
+
+FnDecl : token(Fn) token(Symbol) ParamDeclList option(token(Arrow) Type) Block;
+
+ParamDeclList : token(LParen) list(ParamDecl, token(Comma)) token(RParen);
+
+ParamDecl : token(Symbol) token(Colon) Type;
+
+Type : token(Symbol) | PointerType;
+
+PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type;
+
+Block : token(LBrace) many(Statement) token(RBrace);
+
+Statement : ExpressionStatement | ReturnStatement ;
+
+ExpressionStatement : Expression token(Semicolon) ;
+
+ReturnStatement : token(Return) Expression token(Semicolon) ;
+
+Expression : token(Number) | token(String) | FnCall ;
+
+FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
+```