Commit 7cfceeca2d
Changed files (10)
src/buffer.hpp
@@ -49,19 +49,19 @@ static inline void buf_deinit(Buf *buf) {
buf->list.deinit();
}
-static inline void buf_init_from_mem(Buf *buf, char *ptr, int len) {
+static inline void buf_init_from_mem(Buf *buf, const char *ptr, int len) {
buf->list.resize(len + 1);
memcpy(buf_ptr(buf), ptr, len);
buf->list.at(buf_len(buf)) = 0;
}
-static inline Buf *buf_create_from_mem(char *ptr, int len) {
+static inline Buf *buf_create_from_mem(const char *ptr, int len) {
Buf *buf = allocate<Buf>(1);
buf_init_from_mem(buf, ptr, len);
return buf;
}
-static inline Buf *buf_create_from_str(char *str) {
+static inline Buf *buf_create_from_str(const char *str) {
return buf_create_from_mem(str, strlen(str));
}
@@ -138,7 +138,7 @@ static inline Buf *buf_dirname(Buf *buf) {
return buf_slice(buf, 0, i);
}
}
- zig_panic("TODO buf_dirname no slash");
+ return buf_create_from_mem("", 0);
}
src/grammar.txt
@@ -0,0 +1,53 @@
+Root : many(FnDecl) token(EOF) {
+ $$ = ast_create_root($1);
+}
+
+FnDecl : token(Fn) token(Symbol) token(LParen) list(ParamDecl, token(Comma)) token(RParen) option(token(Arrow) Type) Block {
+ $$ = ast_create_fn_decl($2, $4, $6, $7);
+}
+
+ParamDecl : token(Symbol) token(Colon) Type {
+ $$ = ast_create_param_decl($1, $2);
+}
+
+Type : token(Symbol) {
+ $$ = ast_create_symbol_type($1);
+} | PointerType {
+ $$ = $1;
+}
+
+PointerType : token(Star) token(Const) Type {
+ $$ = ast_create_pointer_type($2, $3);
+} | token(Star) token(Mut) Type {
+ $$ = ast_create_pointer_type($2, $3);
+}
+
+Block : token(LBrace) many(Statement) option(Expression) token(RBrace) {
+ $$ = ast_create_block($2, $3);
+}
+
+Statement : ExpressionStatement {
+ $$ = $1;
+} | ReturnStatement {
+ $$ = $1;
+}
+
+ExpressionStatement : Expression token(Semicolon) {
+ $$ = ast_create_expression_statement($1);
+}
+
+ReturnStatement : token(Return) Expression token(Semicolon) {
+ $$ = ast_create_return_statement($2);
+}
+
+Expression : token(Number) {
+ $$ = ast_create_number($1);
+} | token(String) {
+ $$ = ast_create_string($1);
+} | FnCall {
+ $$ = $1;
+}
+
+FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) {
+ $$ = ast_create_fn_call($1, $3);
+}
src/main.cpp
@@ -9,6 +9,8 @@
#include "util.hpp"
#include "list.hpp"
#include "buffer.hpp"
+#include "parser.hpp"
+#include "tokenizer.hpp"
#include <stdio.h>
#include <string.h>
@@ -48,466 +50,9 @@ static Buf *fetch_file(FILE *f) {
return buf;
}
-static inline bool mem_eql_str(const char *mem, size_t mem_len, const char *str) {
- size_t str_len = strlen(str);
- if (str_len != mem_len)
- return false;
- return memcmp(mem, str, mem_len) == 0;
-}
-
-
-#define WHITESPACE \
- ' ': \
- case '\t': \
- case '\n': \
- case '\f': \
- case '\r': \
- case 0xb
-
-#define DIGIT \
- '0': \
- case '1': \
- case '2': \
- case '3': \
- case '4': \
- case '5': \
- case '6': \
- case '7': \
- case '8': \
- case '9'
-
-#define ALPHA \
- 'a': \
- case 'b': \
- case 'c': \
- case 'd': \
- case 'e': \
- case 'f': \
- case 'g': \
- case 'h': \
- case 'i': \
- case 'j': \
- case 'k': \
- case 'l': \
- case 'm': \
- case 'n': \
- case 'o': \
- case 'p': \
- case 'q': \
- case 'r': \
- case 's': \
- case 't': \
- case 'u': \
- case 'v': \
- case 'w': \
- case 'x': \
- case 'y': \
- case 'z': \
- case 'A': \
- case 'B': \
- case 'C': \
- case 'D': \
- case 'E': \
- case 'F': \
- case 'G': \
- case 'H': \
- case 'I': \
- case 'J': \
- case 'K': \
- case 'L': \
- case 'M': \
- case 'N': \
- case 'O': \
- case 'P': \
- case 'Q': \
- case 'R': \
- case 'S': \
- case 'T': \
- case 'U': \
- case 'V': \
- case 'W': \
- case 'X': \
- case 'Y': \
- case 'Z'
-
-#define SYMBOL_CHAR \
- ALPHA: \
- case DIGIT: \
- case '_'
-
-
-enum TokenId {
- TokenIdSymbol,
- TokenIdKeywordFn,
- TokenIdKeywordReturn,
- TokenIdKeywordMut,
- TokenIdKeywordConst,
- TokenIdLParen,
- TokenIdRParen,
- TokenIdComma,
- TokenIdStar,
- TokenIdLBrace,
- TokenIdRBrace,
- TokenIdStringLiteral,
- TokenIdSemicolon,
- TokenIdNumberLiteral,
- TokenIdPlus,
- TokenIdColon,
- TokenIdArrow,
- TokenIdDash,
-};
-
-struct Token {
- TokenId id;
- int start_pos;
- int end_pos;
- int start_line;
- int start_column;
-};
-
-enum TokenizeState {
- TokenizeStateStart,
- TokenizeStateSymbol,
- TokenizeStateNumber,
- TokenizeStateString,
- TokenizeStateSawDash,
-};
-
-struct Tokenize {
- Buf *buf;
- int pos;
- TokenizeState state;
- ZigList<Token> *tokens;
- int line;
- int column;
- Token *cur_tok;
- Buf *cur_dir_path;
- ZigList<char *> *include_paths;
-};
-
-__attribute__ ((format (printf, 2, 3)))
-static void tokenize_error(Tokenize *t, const char *format, ...) {
- int line;
- int column;
- if (t->cur_tok) {
- line = t->cur_tok->start_line + 1;
- column = t->cur_tok->start_column + 1;
- } else {
- line = t->line + 1;
- column = t->column + 1;
- }
-
- va_list ap;
- va_start(ap, format);
- fprintf(stderr, "Error: Line %d, column %d: ", line, column);
- vfprintf(stderr, format, ap);
- fprintf(stderr, "\n");
- va_end(ap);
- exit(EXIT_FAILURE);
-}
-
-static void begin_token(Tokenize *t, TokenId id) {
- assert(!t->cur_tok);
- t->tokens->add_one();
- Token *token = &t->tokens->last();
- token->start_line = t->line;
- token->start_column = t->column;
- token->id = id;
- token->start_pos = t->pos;
- t->cur_tok = token;
-}
-
-static void end_token(Tokenize *t) {
- assert(t->cur_tok);
- t->cur_tok->end_pos = t->pos + 1;
-
- char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos;
- int token_len = t->cur_tok->end_pos - t->cur_tok->start_pos;
-
- if (mem_eql_str(token_mem, token_len, "fn")) {
- t->cur_tok->id = TokenIdKeywordFn;
- } else if (mem_eql_str(token_mem, token_len, "return")) {
- t->cur_tok->id = TokenIdKeywordReturn;
- } else if (mem_eql_str(token_mem, token_len, "mut")) {
- t->cur_tok->id = TokenIdKeywordMut;
- } else if (mem_eql_str(token_mem, token_len, "const")) {
- t->cur_tok->id = TokenIdKeywordConst;
- }
-
- t->cur_tok = nullptr;
-}
-
-static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *cur_dir_path) {
- Tokenize t = {0};
- t.tokens = allocate<ZigList<Token>>(1);
- t.buf = buf;
- t.cur_dir_path = cur_dir_path;
- t.include_paths = include_paths;
- for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
- uint8_t c = buf_ptr(t.buf)[t.pos];
- switch (t.state) {
- case TokenizeStateStart:
- switch (c) {
- case WHITESPACE:
- break;
- case ALPHA:
- t.state = TokenizeStateSymbol;
- begin_token(&t, TokenIdSymbol);
- break;
- case DIGIT:
- t.state = TokenizeStateNumber;
- begin_token(&t, TokenIdNumberLiteral);
- break;
- case '"':
- begin_token(&t, TokenIdStringLiteral);
- t.state = TokenizeStateString;
- break;
- case '(':
- begin_token(&t, TokenIdLParen);
- end_token(&t);
- break;
- case ')':
- begin_token(&t, TokenIdRParen);
- end_token(&t);
- break;
- case ',':
- begin_token(&t, TokenIdComma);
- end_token(&t);
- break;
- case '*':
- begin_token(&t, TokenIdStar);
- end_token(&t);
- break;
- case '{':
- begin_token(&t, TokenIdLBrace);
- end_token(&t);
- break;
- case '}':
- begin_token(&t, TokenIdRBrace);
- end_token(&t);
- break;
- case ';':
- begin_token(&t, TokenIdSemicolon);
- end_token(&t);
- break;
- case ':':
- begin_token(&t, TokenIdColon);
- end_token(&t);
- break;
- case '+':
- begin_token(&t, TokenIdPlus);
- end_token(&t);
- break;
- case '-':
- begin_token(&t, TokenIdDash);
- t.state = TokenizeStateSawDash;
- break;
- default:
- tokenize_error(&t, "invalid character: '%c'", c);
- }
- break;
- case TokenizeStateSymbol:
- switch (c) {
- case SYMBOL_CHAR:
- break;
- default:
- t.pos -= 1;
- end_token(&t);
- t.state = TokenizeStateStart;
- continue;
- }
- break;
- case TokenizeStateString:
- switch (c) {
- case '"':
- end_token(&t);
- t.state = TokenizeStateStart;
- break;
- default:
- break;
- }
- break;
- case TokenizeStateNumber:
- switch (c) {
- case DIGIT:
- break;
- default:
- t.pos -= 1;
- end_token(&t);
- t.state = TokenizeStateStart;
- continue;
- }
- break;
- case TokenizeStateSawDash:
- switch (c) {
- case '>':
- t.cur_tok->id = TokenIdArrow;
- end_token(&t);
- t.state = TokenizeStateStart;
- break;
- default:
- end_token(&t);
- t.state = TokenizeStateStart;
- break;
- }
- break;
- }
- if (c == '\n') {
- t.line += 1;
- t.column = 0;
- } else {
- t.column += 1;
- }
- }
- // EOF
- switch (t.state) {
- case TokenizeStateStart:
- break;
- case TokenizeStateSymbol:
- end_token(&t);
- break;
- case TokenizeStateString:
- tokenize_error(&t, "unterminated string");
- break;
- case TokenizeStateNumber:
- end_token(&t);
- break;
- case TokenizeStateSawDash:
- end_token(&t);
- break;
- }
- assert(!t.cur_tok);
- return t.tokens;
-}
-
-static const char * token_name(Token *token) {
- switch (token->id) {
- case TokenIdSymbol: return "Symbol";
- case TokenIdKeywordFn: return "Fn";
- case TokenIdKeywordConst: return "Const";
- case TokenIdKeywordMut: return "Mut";
- case TokenIdKeywordReturn: return "Return";
- case TokenIdLParen: return "LParen";
- case TokenIdRParen: return "RParen";
- case TokenIdComma: return "Comma";
- case TokenIdStar: return "Star";
- case TokenIdLBrace: return "LBrace";
- case TokenIdRBrace: return "RBrace";
- case TokenIdStringLiteral: return "StringLiteral";
- case TokenIdSemicolon: return "Semicolon";
- case TokenIdNumberLiteral: return "NumberLiteral";
- case TokenIdPlus: return "Plus";
- case TokenIdColon: return "Colon";
- case TokenIdArrow: return "Arrow";
- case TokenIdDash: return "Dash";
- }
- return "(invalid token)";
-}
-
-static void print_tokens(Buf *buf, ZigList<Token> *tokens) {
- for (int i = 0; i < tokens->length; i += 1) {
- Token *token = &tokens->at(i);
- printf("%s ", token_name(token));
- fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stdout);
- printf("\n");
- }
-}
-
-struct AstNode;
-
-enum NodeType {
- NodeTypeRoot,
- NodeTypeFnDecl,
- NodeTypeParam,
- NodeTypeType,
- NodeTypeBlock,
-};
-
-struct AstNodeFnDecl {
- Buf name;
- ZigList<AstNode *> params;
- AstNode *return_type;
- AstNode *body;
-};
-
-struct AstNodeRoot {
- ZigList<AstNode *> fn_decls;
-};
-
-enum AstNodeTypeType {
- AstNodeTypeTypePrimitive,
- AstNodeTypeTypePointer,
-};
-
-enum AstPrimitiveType {
- AstPrimitiveTypeVoid,
- AstPrimitiveTypeU8,
- AstPrimitiveTypeI8,
- AstPrimitiveTypeU16,
- AstPrimitiveTypeI16,
- AstPrimitiveTypeU32,
- AstPrimitiveTypeI32,
- AstPrimitiveTypeU64,
- AstPrimitiveTypeI64,
- AstPrimitiveTypeUSize,
- AstPrimitiveTypeISize,
- AstPrimitiveTypeF32,
- AstPrimitiveTypeF64,
-};
-
-
-struct AstNodeType {
- AstNodeTypeType type;
- AstPrimitiveType primitive_type;
- AstNode *pointer_type;
- bool is_const;
-};
-
-struct AstNodeParam {
- Buf name;
- AstNode *type;
-};
-
-enum AstState {
- AstStateStart,
- AstStateFn,
- AstStateFnLParen,
- AstStateFnParamName,
- AstStateParamColon,
- AstStateType,
- AstStateTypeEnd,
- AstStateFnParamComma,
- AstStateFnDeclArrow,
- AstStateFnDeclBlock,
- AstStatePointerType,
- AstStateBlock,
-};
-
-struct AstNode {
- enum NodeType type;
- AstNode *parent;
- AstState prev_state;
- union {
- AstNodeRoot root;
- AstNodeFnDecl fn_decl;
- AstNodeType type;
- AstNodeParam param;
- } data;
-};
-
-struct BuildAst {
- Buf *buf;
- AstNode *root;
- AstState state;
- int line;
- int column;
- AstNode *cur_node;
-};
-
-__attribute__ ((format (printf, 2, 3)))
-static void ast_error(BuildAst *b, const char *format, ...) {
- int line = b->line + 1;
- int column = b->column + 1;
+void ast_error(Token *token, const char *format, ...) {
+ int line = token->start_line + 1;
+ int column = token->start_column + 1;
va_list ap;
va_start(ap, format);
@@ -518,37 +63,35 @@ static void ast_error(BuildAst *b, const char *format, ...) {
exit(EXIT_FAILURE);
}
-static AstNode *ast_create_node(BuildAst *b, NodeType type) {
- AstNode *child = allocate<AstNode>(1);
- child->prev_state = b->state;
- child->parent = b->cur_node;
- child->type = type;
- return child;
-}
-
-static void ast_make_node_current(BuildAst *b, AstNode *node) {
- b->cur_node = node;
-}
-
-static void ast_up_stack(BuildAst *b) {
- assert(b->cur_node->parent);
- b->state = b->cur_node->prev_state;
- b->cur_node = b->cur_node->parent;
-}
-
-
static const char *node_type_str(NodeType node_type) {
switch (node_type) {
- case NodeTypeRoot: return "Root";
- case NodeTypeFnDecl: return "FnDecl";
- case NodeTypeParam: return "Param";
- case NodeTypeType: return "Type";
- case NodeTypeBlock: return "Block";
+ case NodeTypeRoot:
+ return "Root";
+ case NodeTypeFnDecl:
+ return "FnDecl";
+ case NodeTypeParamDecl:
+ return "ParamDecl";
+ case NodeTypeType:
+ return "Type";
+ case NodeTypePointerType:
+ return "PointerType";
+ case NodeTypeBlock:
+ return "Block";
+ case NodeTypeStatement:
+ return "Statement";
+ case NodeTypeExpressionStatement:
+ return "ExpressionStatement";
+ case NodeTypeReturnStatement:
+ return "ReturnStatement";
+ case NodeTypeExpression:
+ return "Expression";
+ case NodeTypeFnCall:
+ return "FnCall";
}
zig_panic("unreachable");
}
-static void print_ast(AstNode *node, int indent) {
+static void ast_print(AstNode *node, int indent) {
for (int i = 0; i < indent; i += 1) {
fprintf(stderr, " ");
}
@@ -558,7 +101,7 @@ static void print_ast(AstNode *node, int indent) {
fprintf(stderr, "%s\n", node_type_str(node->type));
for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
AstNode *child = node->data.root.fn_decls.at(i);
- print_ast(child, indent + 2);
+ ast_print(child, indent + 2);
}
break;
case NodeTypeFnDecl:
@@ -568,276 +111,19 @@ static void print_ast(AstNode *node, int indent) {
for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
AstNode *child = node->data.fn_decl.params.at(i);
- print_ast(child, indent + 2);
+ ast_print(child, indent + 2);
}
- print_ast(node->data.fn_decl.return_type, indent + 2);
+ ast_print(node->data.fn_decl.return_type, indent + 2);
- print_ast(node->data.fn_decl.body, indent + 2);
+ ast_print(node->data.fn_decl.body, indent + 2);
break;
}
- case NodeTypeParam:
- {
- Buf *name_buf = &node->data.param.name;
- fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
-
- print_ast(node->data.param.type, indent + 2);
- break;
- }
- case NodeTypeType:
+ default:
fprintf(stderr, "%s\n", node_type_str(node->type));
break;
- case NodeTypeBlock:
- fprintf(stderr, "%s\n", node_type_str(node->type));
- break;
- }
-}
-
-static void ast_end_fn_param_list(BuildAst *b) {
- b->state = AstStateFnDeclArrow;
-}
-
-static AstNode *build_ast(Buf *buf, ZigList<Token> *tokens) {
- BuildAst b = {0};
- b.state = AstStateStart;
- b.buf = buf;
- b.root = allocate<AstNode>(1);
- b.root->type = NodeTypeRoot;
- b.cur_node = b.root;
-
- for (int i = 0; i < tokens->length; i += 1) {
- Token *token = &tokens->at(i);
- char *token_mem = buf_ptr(buf) + token->start_pos;
- int token_len = token->end_pos - token->start_pos;
- b.line = token->start_line;
- b.column = token->start_column;
- switch (b.state) {
- case AstStateStart:
- assert(b.cur_node->type == NodeTypeRoot);
- if (token->id == TokenIdKeywordFn) {
- AstNode *child = ast_create_node(&b, NodeTypeFnDecl);
- b.cur_node->data.root.fn_decls.append(child);
- ast_make_node_current(&b, child);
- b.state = AstStateFn;
- } else {
- Buf msg = {0};
- buf_appendf(&msg, "unexpected %s: '", token_name(token));
- buf_append_mem(&msg, token_mem, token_len);
- buf_append_str(&msg, "'");
- ast_error(&b, "%s", buf_ptr(&msg));
- break;
- }
- break;
- case AstStateFn:
- if (token->id != TokenIdSymbol)
- ast_error(&b, "expected symbol");
- buf_init_from_mem(&b.cur_node->data.fn_decl.name, token_mem, token_len);
- b.state = AstStateFnLParen;
- break;
- case AstStateFnLParen:
- if (token->id != TokenIdLParen)
- ast_error(&b, "expected '('");
- b.state = AstStateFnParamName;
- break;
- case AstStateFnParamName:
- switch (token->id) {
- case TokenIdSymbol:
- {
- b.state = AstStateFnParamComma;
- AstNode *child = ast_create_node(&b, NodeTypeParam);
- buf_init_from_mem(&child->data.param.name, token_mem, token_len);
- b.cur_node->data.fn_decl.params.append(child);
- ast_make_node_current(&b, child);
- b.state = AstStateParamColon;
- break;
- }
- case TokenIdRParen:
- ast_end_fn_param_list(&b);
- break;
- default:
- ast_error(&b, "expected parameter name");
- break;
- }
- break;
- case AstStateParamColon:
- {
- if (token->id != TokenIdColon)
- ast_error(&b, "expected ':'");
- assert(b.cur_node->type == NodeTypeParam);
- b.state = AstStateTypeEnd;
- AstNode *child = ast_create_node(&b, NodeTypeType);
- b.cur_node->data.param.type = child;
- ast_make_node_current(&b, child);
- b.state = AstStateType;
- break;
- }
- case AstStateType:
- switch (token->id) {
- case TokenIdSymbol:
- assert(b.cur_node->type == NodeTypeType);
- if (mem_eql_str(token_mem, token_len, "u8")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeU8;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "i8")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeI8;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "u16")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeU16;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "i16")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeI16;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "u32")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeU32;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "i32")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeI32;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "u64")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeU64;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "i64")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeI64;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "usize")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeUSize;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "isize")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeISize;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "f32")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeF32;
- ast_up_stack(&b);
- } else if (mem_eql_str(token_mem, token_len, "f64")) {
- b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
- b.cur_node->data.type.primitive_type = AstPrimitiveTypeF64;
- ast_up_stack(&b);
- } else {
- Buf msg = {0};
- buf_append_str(&msg, "invalid primitive type: '");
- buf_append_mem(&msg, token_mem, token_len);
- buf_append_str(&msg, "'");
- ast_error(&b, "%s", buf_ptr(&msg));
- }
- break;
- case TokenIdStar:
- b.cur_node->data.type.type = AstNodeTypeTypePointer;
- b.state = AstStatePointerType;
- break;
- default:
- ast_error(&b, "expected type name");
- break;
- }
- break;
- case AstStatePointerType:
- {
- if (token->id == TokenIdKeywordMut) {
- b.cur_node->data.type.is_const = false;
- } else if (token->id == TokenIdKeywordConst) {
- b.cur_node->data.type.is_const = true;
- } else {
- ast_error(&b, "expected 'mut' or 'const'");
- }
- b.state = AstStateTypeEnd;
- AstNode *child = ast_create_node(&b, NodeTypeType);
- b.cur_node->data.type.pointer_type = child;
- ast_make_node_current(&b, child);
- b.state = AstStateType;
- break;
- }
- case AstStateTypeEnd:
- ast_up_stack(&b);
- i -= 1;
- continue;
- case AstStateFnParamComma:
- switch (token->id) {
- case TokenIdComma:
- b.state = AstStateFnParamName;
- break;
- case TokenIdRParen:
- ast_end_fn_param_list(&b);
- break;
- default:
- ast_error(&b, "expected ',' or ')'");
- break;
-
- }
- break;
- case AstStateFnDeclArrow:
- switch (token->id) {
- case TokenIdArrow:
- {
- assert(b.cur_node->type == NodeTypeFnDecl);
- b.state = AstStateFnDeclBlock;
- AstNode *child = ast_create_node(&b, NodeTypeType);
- b.cur_node->data.fn_decl.return_type = child;
- ast_make_node_current(&b, child);
- b.state = AstStateType;
- break;
- }
- case TokenIdLBrace:
- {
- AstNode *node = ast_create_node(&b, NodeTypeType);
- node->data.type.type = AstNodeTypeTypePrimitive;
- node->data.type.primitive_type = AstPrimitiveTypeVoid;
- b.cur_node->data.fn_decl.return_type = node;
-
- b.state = AstStateTypeEnd;
- AstNode *child = ast_create_node(&b, NodeTypeBlock);
- b.cur_node->data.fn_decl.body = child;
- ast_make_node_current(&b, child);
- b.state = AstStateBlock;
- break;
- }
- default:
- ast_error(&b, "expected '->' or '}'");
- break;
- }
- break;
- case AstStateFnDeclBlock:
- {
- if (token->id != TokenIdLBrace)
- ast_error(&b, "expected '{'");
-
- b.state = AstStateTypeEnd;
- AstNode *child = ast_create_node(&b, NodeTypeBlock);
- b.cur_node->data.fn_decl.body = child;
- ast_make_node_current(&b, child);
- b.state = AstStateBlock;
- break;
- }
- case AstStateBlock:
- switch (token->id) {
- case TokenIdSymbol:
- zig_panic("TODO symbol");
- break;
- default:
- {
- Buf msg = {0};
- buf_appendf(&msg, "unexpected %s: '", token_name(token));
- buf_append_mem(&msg, token_mem, token_len);
- buf_append_str(&msg, "'");
- ast_error(&b, "%s", buf_ptr(&msg));
- break;
- }
- }
- break;
- }
}
-
- return b.root;
}
char cur_dir[1024];
@@ -896,14 +182,15 @@ int main(int argc, char **argv) {
fprintf(stderr, "----------------\n");
fprintf(stderr, "%s\n", buf_ptr(in_data));
- ZigList<Token> *tokens = tokenize(in_data, &include_paths, cur_dir_path);
+ ZigList<Token> *tokens = tokenize(in_data, cur_dir_path);
fprintf(stderr, "\nTokens:\n");
fprintf(stderr, "---------\n");
print_tokens(in_data, tokens);
- AstNode *root = build_ast(in_data, tokens);
- print_ast(root, 0);
+ AstNode *root = ast_parse(in_data, tokens);
+ assert(root);
+ ast_print(root, 0);
return EXIT_SUCCESS;
src/parser.hpp
@@ -0,0 +1,87 @@
+#ifndef ZIG_PARSER_HPP
+#define ZIG_PARSER_HPP
+
+#include "list.hpp"
+#include "buffer.hpp"
+#include "tokenizer.hpp"
+
+struct AstNode;
+
+enum NodeType {
+ NodeTypeRoot,
+ NodeTypeFnDecl,
+ NodeTypeParamDecl,
+ NodeTypeType,
+ NodeTypePointerType,
+ NodeTypeBlock,
+ NodeTypeStatement,
+ NodeTypeExpressionStatement,
+ NodeTypeReturnStatement,
+ NodeTypeExpression,
+ NodeTypeFnCall,
+};
+
+struct AstNodeRoot {
+ ZigList<AstNode *> fn_decls;
+};
+
+struct AstNodeFnDecl {
+ Buf name;
+ ZigList<AstNode *> params;
+ AstNode *return_type;
+ AstNode *body;
+};
+
+struct AstNodeParamDecl {
+ Buf name;
+ AstNode *type;
+};
+
+enum AstNodeTypeType {
+ AstNodeTypeTypePrimitive,
+ AstNodeTypeTypePointer,
+};
+
+struct AstNodeType {
+ AstNodeTypeType type;
+ AstNode *child;
+};
+
+struct AstNodePointerType {
+ AstNode *const_or_mut;
+ AstNode *type;
+};
+
+struct AstNodeBlock {
+ ZigList<AstNode *> expressions;
+};
+
+struct AstNodeExpression {
+ AstNode *child;
+};
+
+struct AstNodeFnCall {
+ Buf name;
+ ZigList<AstNode *> params;
+};
+
+struct AstNode {
+ enum NodeType type;
+ AstNode *parent;
+ union {
+ AstNodeRoot root;
+ AstNodeFnDecl fn_decl;
+ AstNodeType type;
+ AstNodeParamDecl param_decl;
+ AstNodeBlock block;
+ AstNodeExpression expression;
+ AstNodeFnCall fn_call;
+ } data;
+};
+
+__attribute__ ((format (printf, 2, 3)))
+void ast_error(Token *token, const char *format, ...);
+
+AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens);
+
+#endif
src/parsergen.cpp
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2015 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
+#include "util.hpp"
+#include "buffer.hpp"
+#include "list.hpp"
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <limits.h>
+
+static Buf *fetch_file(FILE *f) {
+ int fd = fileno(f);
+ struct stat st;
+ if (fstat(fd, &st))
+ zig_panic("unable to stat file: %s", strerror(errno));
+ off_t big_size = st.st_size;
+ if (big_size > INT_MAX)
+ zig_panic("file too big");
+ int size = (int)big_size;
+
+ Buf *buf = buf_alloc_fixed(size);
+ size_t amt_read = fread(buf_ptr(buf), 1, buf_len(buf), f);
+ if (amt_read != (size_t)buf_len(buf))
+ zig_panic("error reading: %s", strerror(errno));
+
+ return buf;
+}
+
+static int usage(const char *arg0) {
+ fprintf(stderr, "Usage: %s in-grammar.txt out-parser.c\n", arg0);
+ return 1;
+}
+
+struct Token {
+ Buf name;
+ int id;
+};
+
+struct RuleNode;
+
+struct RuleTuple {
+ ZigList<RuleNode *> children;
+};
+
+struct RuleMany {
+ RuleNode *child;
+};
+
+struct RuleOption {
+ ZigList<RuleNode *> child;
+};
+
+struct RuleOr {
+ ZigList<RuleTuple *> children;
+};
+
+struct RuleToken {
+ Token *token;
+};
+
+struct RuleBlock {
+ Buf *body;
+};
+
+struct RuleList {
+ RuleNode *rule;
+ RuleToken *separator;
+};
+
+struct RuleSubRule {
+ RuleNode *child;
+};
+
+enum RuleNodeType {
+ RuleNodeTypeTuple,
+ RuleNodeTypeMany,
+ RuleNodeTypeList,
+ RuleNodeTypeOption,
+ RuleNodeTypeOr,
+ RuleNodeTypeToken,
+ RuleNodeTypeSubRule,
+};
+
+struct RuleNode {
+ RuleNodeType type;
+ union {
+ RuleTuple tuple;
+ RuleMany many;
+ RuleList list;
+ RuleOption option;
+ RuleOr _or;
+ RuleToken token;
+ RuleSubRule sub_rule;
+ };
+};
+
+enum ParserStateType {
+ ParserStateTypeError,
+ ParserStateTypeOk,
+};
+
+struct ParserStateError {
+ Buf *msg;
+};
+
+struct ParserState {
+ ParserStateType type;
+ // One for each token ID.
+ ParserState **transition;
+ int index;
+ union {
+ ParserStateError error;
+ };
+};
+
+struct Gen {
+ ParserState *cur_state;
+ ZigList<ParserState *> transition_table;
+ ZigList<Token *> tokens;
+ RuleNode *root;
+};
+
+static ParserState *create_state(Gen *g, ParserStateType type) {
+ ParserState *state = allocate<ParserState>(1);
+ state->type = type;
+ state->index = g->transition_table.length;
+ state->transition = allocate<ParserState*>(g->tokens.length);
+ g->transition_table.append(state);
+ return state;
+}
+
+static void fill_state_with_transition(Gen *g, ParserState *source, ParserState *dest) {
+ for (int i = 0; i < g->tokens.length; i += 1) {
+ source->transition[i] = dest;
+ }
+}
+
+static void gen(Gen *g, RuleNode *node) {
+ switch (node->type) {
+ case RuleNodeTypeToken:
+ {
+ ParserState *ok_state = create_state(g, ParserStateTypeOk);
+ ParserState *err_state = create_state(g, ParserStateTypeError);
+
+ err_state->error.msg = buf_sprintf("expected token '%s'", buf_ptr(&node->token.token->name));
+
+ fill_state_with_transition(g, g->cur_state, err_state);
+ g->cur_state->transition[node->token.token->id] = ok_state;
+ g->cur_state = ok_state;
+ }
+ break;
+ case RuleNodeTypeTuple:
+ {
+ for (int i = 0; i < node->tuple.children.length; i += 1) {
+ RuleNode *child = node->tuple.children.at(i);
+ gen(g, child);
+ }
+ }
+ break;
+ case RuleNodeTypeMany:
+ zig_panic("TODO");
+ break;
+ case RuleNodeTypeList:
+ zig_panic("TODO");
+ break;
+ case RuleNodeTypeOption:
+ zig_panic("TODO");
+ break;
+ case RuleNodeTypeOr:
+ zig_panic("TODO");
+ break;
+ case RuleNodeTypeSubRule:
+ zig_panic("TODO");
+ break;
+ }
+}
+
+static Token *find_token_by_name(Gen *g, Buf *name) {
+ for (int i = 0; i < g->tokens.length; i += 1) {
+ Token *token = g->tokens.at(i);
+ if (buf_eql_buf(name, &token->name))
+ return token;
+ }
+ return nullptr;
+}
+
+static Token *find_or_create_token(Gen *g, Buf *name) {
+ Token *token = find_token_by_name(g, name);
+ if (!token) {
+ token = allocate<Token>(1);
+ token->id = g->tokens.length;
+ buf_init_from_mem(&token->name, buf_ptr(name), buf_len(name));
+ g->tokens.append(token);
+ }
+ return token;
+}
+
+int main(int argc, char **argv) {
+ const char *in_filename = argv[1];
+ const char *out_filename = argv[2];
+
+ if (!in_filename || !out_filename)
+ return usage(argv[0]);
+
+ FILE *in_f;
+ if (strcmp(in_filename, "-") == 0) {
+ in_f = stdin;
+ } else {
+ in_f = fopen(in_filename, "rb");
+ }
+
+ FILE *out_f;
+ if (strcmp(out_filename, "-") == 0) {
+ out_f = stdout;
+ } else {
+ out_f = fopen(out_filename, "wb");
+ }
+
+ if (!in_f || !out_f)
+ zig_panic("unable to open file(s)");
+
+ Buf *in_buf = fetch_file(in_f);
+
+ ZigList<RuleNode *> rules = {0};
+ Gen g = {0};
+
+ //zig_panic("TODO initialize rules");
+ {
+ Token *star_token = find_or_create_token(&g, buf_create_from_str("Star"));
+ Token *lparen_token = find_or_create_token(&g, buf_create_from_str("LParen"));
+ Token *eof_token = find_or_create_token(&g, buf_create_from_str("Eof"));
+
+ RuleNode *root = allocate<RuleNode>(1);
+ root->type = RuleNodeTypeTuple;
+
+ RuleNode *star_node = allocate<RuleNode>(1);
+ star_node->type = RuleNodeTypeToken;
+ star_node->token.token = star_token;
+ root->tuple.children.append(star_node);
+
+ RuleNode *lparen_node = allocate<RuleNode>(1);
+ lparen_node->type = RuleNodeTypeToken;
+ lparen_node->token.token = lparen_token;
+ root->tuple.children.append(lparen_node);
+
+ RuleNode *eof_node = allocate<RuleNode>(1);
+ eof_node->type = RuleNodeTypeToken;
+ eof_node->token.token = eof_token;
+ root->tuple.children.append(eof_node);
+
+ rules.append(root);
+ }
+
+ g.root = rules.at(0);
+
+ g.cur_state = create_state(&g, ParserStateTypeOk);
+ gen(&g, g.root);
+
+
+
+ (void)in_buf;
+
+ fprintf(out_f, "/* This file is auto-generated by parsergen.cpp */\n");
+ fprintf(out_f, "#include \"src/parser.hpp\"\n");
+ fprintf(out_f, "#include <stdio.h>\n");
+
+ fprintf(out_f, "\n");
+ fprintf(out_f, "/*\n");
+ fprintf(out_f, "enum TokenId {\n");
+ for (int i = 0; i < g.tokens.length; i += 1) {
+ Token *token = g.tokens.at(i);
+ fprintf(out_f, " TokenId%s = %d,\n", buf_ptr(&token->name), token->id);
+ }
+ fprintf(out_f, "};\n");
+ fprintf(out_f, "*/\n");
+ for (int i = 0; i < g.tokens.length; i += 1) {
+ Token *token = g.tokens.at(i);
+ fprintf(out_f, "static_assert(TokenId%s == %d, \"wrong token id\");\n",
+ buf_ptr(&token->name), token->id);
+ }
+
+ fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens) {\n");
+
+ fprintf(out_f, " static const int transition[%d][%d] = {\n", g.transition_table.length, g.tokens.length);
+ for (int state_index = 0; state_index < g.transition_table.length; state_index += 1) {
+ ParserState *state = g.transition_table.at(state_index);
+ fprintf(out_f, " {\n");
+
+ for (int token_id = 0; token_id < g.tokens.length; token_id += 1) {
+ ParserState *dest = state->transition[token_id];
+ fprintf(out_f, " %d,\n", dest ? dest->index : -1);
+ }
+
+ fprintf(out_f, " },\n");
+ }
+ fprintf(out_f, " };\n");
+
+
+ fprintf(out_f, " int state = 0;\n");
+ fprintf(out_f, " AstNode *root = nullptr;\n");
+
+ fprintf(out_f, " for (int i = 0; i < tokens->length; i += 1) {\n");
+ fprintf(out_f, " Token *token = &tokens->at(i);\n");
+ fprintf(out_f, " switch (state) {\n");
+
+ for (int i = 0; i < g.transition_table.length; i += 1) {
+ ParserState *state = g.transition_table.at(i);
+ fprintf(out_f, " case %d:\n", i);
+ fprintf(out_f, " fprintf(stderr, \"state = %%d\\n\", state);\n");
+ switch (state->type) {
+ case ParserStateTypeError:
+ fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg));
+ break;
+ case ParserStateTypeOk:
+ fprintf(out_f, " assert(transition[%d][token->id] >= 0);\n", state->index);
+ fprintf(out_f, " assert(transition[%d][token->id] < %d);\n",
+ state->index, g.transition_table.length);
+ fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
+ break;
+ }
+ fprintf(out_f, " break;\n");
+ }
+ fprintf(out_f, " default:\n");
+ fprintf(out_f, " zig_panic(\"unreachable\");\n");
+
+ fprintf(out_f, " }\n");
+ fprintf(out_f, " }\n");
+ fprintf(out_f, " return root;\n");
+ fprintf(out_f, "}\n");
+
+
+}
src/tokenizer.cpp
@@ -0,0 +1,328 @@
+#include "tokenizer.hpp"
+#include "util.hpp"
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define WHITESPACE \
+ ' ': \
+ case '\t': \
+ case '\n': \
+ case '\f': \
+ case '\r': \
+ case 0xb
+
+#define DIGIT \
+ '0': \
+ case '1': \
+ case '2': \
+ case '3': \
+ case '4': \
+ case '5': \
+ case '6': \
+ case '7': \
+ case '8': \
+ case '9'
+
+#define ALPHA \
+ 'a': \
+ case 'b': \
+ case 'c': \
+ case 'd': \
+ case 'e': \
+ case 'f': \
+ case 'g': \
+ case 'h': \
+ case 'i': \
+ case 'j': \
+ case 'k': \
+ case 'l': \
+ case 'm': \
+ case 'n': \
+ case 'o': \
+ case 'p': \
+ case 'q': \
+ case 'r': \
+ case 's': \
+ case 't': \
+ case 'u': \
+ case 'v': \
+ case 'w': \
+ case 'x': \
+ case 'y': \
+ case 'z': \
+ case 'A': \
+ case 'B': \
+ case 'C': \
+ case 'D': \
+ case 'E': \
+ case 'F': \
+ case 'G': \
+ case 'H': \
+ case 'I': \
+ case 'J': \
+ case 'K': \
+ case 'L': \
+ case 'M': \
+ case 'N': \
+ case 'O': \
+ case 'P': \
+ case 'Q': \
+ case 'R': \
+ case 'S': \
+ case 'T': \
+ case 'U': \
+ case 'V': \
+ case 'W': \
+ case 'X': \
+ case 'Y': \
+ case 'Z'
+
+#define SYMBOL_CHAR \
+ ALPHA: \
+ case DIGIT: \
+ case '_'
+
+
+struct Tokenize {
+ Buf *buf;
+ int pos;
+ TokenizeState state;
+ ZigList<Token> *tokens;
+ int line;
+ int column;
+ Token *cur_tok;
+ Buf *cur_dir_path;
+};
+
+__attribute__ ((format (printf, 2, 3)))
+static void tokenize_error(Tokenize *t, const char *format, ...) {
+ int line;
+ int column;
+ if (t->cur_tok) {
+ line = t->cur_tok->start_line + 1;
+ column = t->cur_tok->start_column + 1;
+ } else {
+ line = t->line + 1;
+ column = t->column + 1;
+ }
+
+ va_list ap;
+ va_start(ap, format);
+ fprintf(stderr, "Error: Line %d, column %d: ", line, column);
+ vfprintf(stderr, format, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void begin_token(Tokenize *t, TokenId id) {
+ assert(!t->cur_tok);
+ t->tokens->add_one();
+ Token *token = &t->tokens->last();
+ token->start_line = t->line;
+ token->start_column = t->column;
+ token->id = id;
+ token->start_pos = t->pos;
+ t->cur_tok = token;
+}
+
+static void end_token(Tokenize *t) {
+ assert(t->cur_tok);
+ t->cur_tok->end_pos = t->pos + 1;
+
+ char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos;
+ int token_len = t->cur_tok->end_pos - t->cur_tok->start_pos;
+
+ if (mem_eql_str(token_mem, token_len, "fn")) {
+ t->cur_tok->id = TokenIdKeywordFn;
+ } else if (mem_eql_str(token_mem, token_len, "return")) {
+ t->cur_tok->id = TokenIdKeywordReturn;
+ } else if (mem_eql_str(token_mem, token_len, "mut")) {
+ t->cur_tok->id = TokenIdKeywordMut;
+ } else if (mem_eql_str(token_mem, token_len, "const")) {
+ t->cur_tok->id = TokenIdKeywordConst;
+ }
+
+ t->cur_tok = nullptr;
+}
+
+ZigList<Token> *tokenize(Buf *buf, Buf *cur_dir_path) {
+ Tokenize t = {0};
+ t.tokens = allocate<ZigList<Token>>(1);
+ t.buf = buf;
+ t.cur_dir_path = cur_dir_path;
+ for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
+ uint8_t c = buf_ptr(t.buf)[t.pos];
+ switch (t.state) {
+ case TokenizeStateStart:
+ switch (c) {
+ case WHITESPACE:
+ break;
+ case ALPHA:
+ t.state = TokenizeStateSymbol;
+ begin_token(&t, TokenIdSymbol);
+ break;
+ case DIGIT:
+ t.state = TokenizeStateNumber;
+ begin_token(&t, TokenIdNumberLiteral);
+ break;
+ case '"':
+ begin_token(&t, TokenIdStringLiteral);
+ t.state = TokenizeStateString;
+ break;
+ case '(':
+ begin_token(&t, TokenIdLParen);
+ end_token(&t);
+ break;
+ case ')':
+ begin_token(&t, TokenIdRParen);
+ end_token(&t);
+ break;
+ case ',':
+ begin_token(&t, TokenIdComma);
+ end_token(&t);
+ break;
+ case '*':
+ begin_token(&t, TokenIdStar);
+ end_token(&t);
+ break;
+ case '{':
+ begin_token(&t, TokenIdLBrace);
+ end_token(&t);
+ break;
+ case '}':
+ begin_token(&t, TokenIdRBrace);
+ end_token(&t);
+ break;
+ case ';':
+ begin_token(&t, TokenIdSemicolon);
+ end_token(&t);
+ break;
+ case ':':
+ begin_token(&t, TokenIdColon);
+ end_token(&t);
+ break;
+ case '+':
+ begin_token(&t, TokenIdPlus);
+ end_token(&t);
+ break;
+ case '-':
+ begin_token(&t, TokenIdDash);
+ t.state = TokenizeStateSawDash;
+ break;
+ default:
+ tokenize_error(&t, "invalid character: '%c'", c);
+ }
+ break;
+ case TokenizeStateSymbol:
+ switch (c) {
+ case SYMBOL_CHAR:
+ break;
+ default:
+ t.pos -= 1;
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ continue;
+ }
+ break;
+ case TokenizeStateString:
+ switch (c) {
+ case '"':
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ break;
+ default:
+ break;
+ }
+ break;
+ case TokenizeStateNumber:
+ switch (c) {
+ case DIGIT:
+ break;
+ default:
+ t.pos -= 1;
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ continue;
+ }
+ break;
+ case TokenizeStateSawDash:
+ switch (c) {
+ case '>':
+ t.cur_tok->id = TokenIdArrow;
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ break;
+ default:
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ break;
+ }
+ break;
+ }
+ if (c == '\n') {
+ t.line += 1;
+ t.column = 0;
+ } else {
+ t.column += 1;
+ }
+ }
+ // EOF
+ switch (t.state) {
+ case TokenizeStateStart:
+ break;
+ case TokenizeStateSymbol:
+ end_token(&t);
+ break;
+ case TokenizeStateString:
+ tokenize_error(&t, "unterminated string");
+ break;
+ case TokenizeStateNumber:
+ end_token(&t);
+ break;
+ case TokenizeStateSawDash:
+ end_token(&t);
+ break;
+ }
+ begin_token(&t, TokenIdEof);
+ end_token(&t);
+ assert(!t.cur_tok);
+ return t.tokens;
+}
+
+static const char * token_name(Token *token) {
+ switch (token->id) {
+ case TokenIdEof: return "EOF";
+ case TokenIdSymbol: return "Symbol";
+ case TokenIdKeywordFn: return "Fn";
+ case TokenIdKeywordConst: return "Const";
+ case TokenIdKeywordMut: return "Mut";
+ case TokenIdKeywordReturn: return "Return";
+ case TokenIdLParen: return "LParen";
+ case TokenIdRParen: return "RParen";
+ case TokenIdComma: return "Comma";
+ case TokenIdStar: return "Star";
+ case TokenIdLBrace: return "LBrace";
+ case TokenIdRBrace: return "RBrace";
+ case TokenIdStringLiteral: return "StringLiteral";
+ case TokenIdSemicolon: return "Semicolon";
+ case TokenIdNumberLiteral: return "NumberLiteral";
+ case TokenIdPlus: return "Plus";
+ case TokenIdColon: return "Colon";
+ case TokenIdArrow: return "Arrow";
+ case TokenIdDash: return "Dash";
+ }
+ return "(invalid token)";
+}
+
+void print_tokens(Buf *buf, ZigList<Token> *tokens) {
+ for (int i = 0; i < tokens->length; i += 1) {
+ Token *token = &tokens->at(i);
+ printf("%s ", token_name(token));
+ fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stdout);
+ printf("\n");
+ }
+}
+
src/tokenizer.hpp
@@ -0,0 +1,73 @@
+#ifndef ZIG_TOKENIZER_HPP
+#define ZIG_TOKENIZER_HPP
+
+#include "buffer.hpp"
+
+/*
+enum TokenId {
+ TokenIdEof,
+ TokenIdSymbol,
+ TokenIdKeywordFn,
+ TokenIdKeywordReturn,
+ TokenIdKeywordMut,
+ TokenIdKeywordConst,
+ TokenIdLParen,
+ TokenIdRParen,
+ TokenIdComma,
+ TokenIdStar,
+ TokenIdLBrace,
+ TokenIdRBrace,
+ TokenIdStringLiteral,
+ TokenIdSemicolon,
+ TokenIdNumberLiteral,
+ TokenIdPlus,
+ TokenIdColon,
+ TokenIdArrow,
+ TokenIdDash,
+};
+*/
+
+// TODO: debug delete this
+enum TokenId {
+ TokenIdStar = 0,
+ TokenIdLParen = 1,
+ TokenIdEof = 2,
+ TokenIdSymbol,
+ TokenIdKeywordFn,
+ TokenIdKeywordReturn,
+ TokenIdKeywordMut,
+ TokenIdKeywordConst,
+ TokenIdRParen,
+ TokenIdComma,
+ TokenIdLBrace,
+ TokenIdRBrace,
+ TokenIdStringLiteral,
+ TokenIdSemicolon,
+ TokenIdNumberLiteral,
+ TokenIdPlus,
+ TokenIdColon,
+ TokenIdArrow,
+ TokenIdDash,
+};
+
+struct Token {
+ TokenId id;
+ int start_pos;
+ int end_pos;
+ int start_line;
+ int start_column;
+};
+
+enum TokenizeState {
+ TokenizeStateStart,
+ TokenizeStateSymbol,
+ TokenizeStateNumber,
+ TokenizeStateString,
+ TokenizeStateSawDash,
+};
+
+ZigList<Token> *tokenize(Buf *buf, Buf *cur_dir_path);
+
+void print_tokens(Buf *buf, ZigList<Token> *tokens);
+
+#endif
src/util.hpp
@@ -60,4 +60,12 @@ template<typename T>
static inline T clamp(T min_value, T value, T max_value) {
return max(min(value, max_value), min_value);
}
+
+static inline bool mem_eql_str(const char *mem, size_t mem_len, const char *str) {
+ size_t str_len = strlen(str);
+ if (str_len != mem_len)
+ return false;
+ return memcmp(mem, str, mem_len) == 0;
+}
+
#endif
CMakeLists.txt
@@ -20,10 +20,21 @@ include_directories(
${CMAKE_BINARY_DIR}
)
+set(GRAMMAR_TXT "${CMAKE_BINARY_DIR}/simple.txt")
+set(PARSER_CPP "${CMAKE_BINARY_DIR}/parser.cpp")
+
set(ZIG_SOURCES
"${CMAKE_SOURCE_DIR}/src/main.cpp"
"${CMAKE_SOURCE_DIR}/src/util.cpp"
"${CMAKE_SOURCE_DIR}/src/buffer.cpp"
+ "${CMAKE_SOURCE_DIR}/src/tokenizer.cpp"
+ ${PARSER_CPP}
+)
+
+set(PARSERGEN_SOURCES
+ "${CMAKE_SOURCE_DIR}/src/parsergen.cpp"
+ "${CMAKE_SOURCE_DIR}/src/util.cpp"
+ "${CMAKE_SOURCE_DIR}/src/buffer.cpp"
)
set(CONFIGURE_OUT_FILE "${CMAKE_BINARY_DIR}/config.h")
@@ -49,3 +60,16 @@ target_link_libraries(zig LINK_PUBLIC
${LLVM_LIBRARIES}
)
install(TARGETS zig DESTINATION bin)
+
+add_executable(parsergen ${PARSERGEN_SOURCES})
+set_target_properties(parsergen PROPERTIES
+ LINKER_LANGUAGE C
+ COMPILE_FLAGS ${EXE_CFLAGS})
+
+
+add_custom_command(
+ OUTPUT ${PARSER_CPP}
+ COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_CPP}
+ DEPENDS ${GRAMMAR_TXT} ${PARSERGEN_SOURCES}
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+)
README.md
@@ -11,7 +11,7 @@ readable, safe, optimal, and concise code to solve any computing problem.
* Creating a C library should be a primary use case. Should be easy to export
an auto-generated .h file.
* Generics such as containers.
- * Do not depend on libc.
+ * Do not depend on libc unless explicitly imported.
* First class error code support.
* Include documentation generator.
* Eliminate the need for make, cmake, etc.
@@ -25,6 +25,7 @@ readable, safe, optimal, and concise code to solve any computing problem.
- Whitespace at the end of line is a compile error.
* Resilient to parsing errors to make IDE integration work well.
* Source code is UTF-8.
+ * Shebang line OK so language can be used for "scripting" as well.
## Roadmap
@@ -35,3 +36,19 @@ readable, safe, optimal, and concise code to solve any computing problem.
* Unit tests.
* Simple .so library
* How should the Widget use case be solved? In Genesis I'm using C++ and inheritance.
+
+## Grammar
+
+```
+Root : FnDecl*
+FnDecl : TokenFn TokenSymbol TokenLParen list(ParamDecl, TokenComma, 0) TokenRParen (TokenArrow Type)? Block
+ParamDecl : TokenSymbol TokenColon Type
+Type : TokenSymbol | PointerType
+PointerType : TokenStar (TokenConst | TokenMut) Type
+Block : TokenLBrace Statement* Expression? TokenRBrace
+Statement : ExpressionStatement | ReturnStatement
+ExpressionStatement : Expression TokenSemicolon
+ReturnStatement : TokenReturn Expression TokenSemicolon
+Expression : TokenNumber | TokenString | FnCall
+FnCall : TokenSymbol TokenLParen list(Expression, TokenComma, 0) TokenRParen
+```