Commit 303823b6b8

Andrew Kelley <superjoe30@gmail.com>
2015-11-02 11:39:36
building part of the hello world AST
1 parent 34f8d80
Changed files (4)
src/buffer.hpp
@@ -49,16 +49,20 @@ static inline void buf_deinit(Buf *buf) {
     buf->list.deinit();
 }
 
-static inline Buf *buf_from_mem(char *ptr, int len) {
-    Buf *buf = allocate<Buf>(1);
+static inline void buf_init_from_mem(Buf *buf, char *ptr, int len) {
     buf->list.resize(len + 1);
     memcpy(buf_ptr(buf), ptr, len);
     buf->list.at(buf_len(buf)) = 0;
+}
+
+static inline Buf *buf_create_from_mem(char *ptr, int len) {
+    Buf *buf = allocate<Buf>(1);
+    buf_init_from_mem(buf, ptr, len);
     return buf;
 }
 
-static inline Buf *buf_from_str(char *str) {
-    return buf_from_mem(str, strlen(str));
+static inline Buf *buf_create_from_str(char *str) {
+    return buf_create_from_mem(str, strlen(str));
 }
 
 static inline Buf *buf_slice(Buf *in_buf, int start, int end) {
src/main.cpp
@@ -48,6 +48,14 @@ static Buf *fetch_file(FILE *f) {
     return buf;
 }
 
+static inline bool mem_eql_str(const char *mem, size_t mem_len, const char *str) {
+    size_t str_len = strlen(str);
+    if (str_len != mem_len)
+        return false;
+    return memcmp(mem, str, mem_len) == 0;
+}
+
+
 #define WHITESPACE \
     ' ': \
     case '\t': \
@@ -130,6 +138,10 @@ static Buf *fetch_file(FILE *f) {
 
 enum TokenId {
     TokenIdSymbol,
+    TokenIdKeywordFn,
+    TokenIdKeywordReturn,
+    TokenIdKeywordMut,
+    TokenIdKeywordConst,
     TokenIdLParen,
     TokenIdRParen,
     TokenIdComma,
@@ -208,6 +220,20 @@ static void begin_token(Tokenize *t, TokenId id) {
 static void end_token(Tokenize *t) {
     assert(t->cur_tok);
     t->cur_tok->end_pos = t->pos + 1;
+
+    char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos;
+    int token_len = t->cur_tok->end_pos - t->cur_tok->start_pos;
+
+    if (mem_eql_str(token_mem, token_len, "fn")) {
+        t->cur_tok->id = TokenIdKeywordFn;
+    } else if (mem_eql_str(token_mem, token_len, "return")) {
+        t->cur_tok->id = TokenIdKeywordReturn;
+    } else if (mem_eql_str(token_mem, token_len, "mut")) {
+        t->cur_tok->id = TokenIdKeywordMut;
+    } else if (mem_eql_str(token_mem, token_len, "const")) {
+        t->cur_tok->id = TokenIdKeywordConst;
+    }
+
     t->cur_tok = nullptr;
 }
 
@@ -357,6 +383,10 @@ static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *c
 static const char * token_name(Token *token) {
     switch (token->id) {
         case TokenIdSymbol: return "Symbol";
+        case TokenIdKeywordFn: return "Fn";
+        case TokenIdKeywordConst: return "Const";
+        case TokenIdKeywordMut: return "Mut";
+        case TokenIdKeywordReturn: return "Return";
         case TokenIdLParen: return "LParen";
         case TokenIdRParen: return "RParen";
         case TokenIdComma: return "Comma";
@@ -383,17 +413,86 @@ static void print_tokens(Buf *buf, ZigList<Token> *tokens) {
     }
 }
 
+struct AstNode;
+
 enum NodeType {
     NodeTypeRoot,
+    NodeTypeFnDecl,
+    NodeTypeParam,
+    NodeTypeType,
+    NodeTypeBlock,
 };
 
-struct AstNode {
-    enum NodeType type;
-    ZigList<AstNode *> children;
+struct AstNodeFnDecl {
+    Buf name;
+    ZigList<AstNode *> params;
+    AstNode *return_type;
+    AstNode *body;
+};
+
+struct AstNodeRoot {
+    ZigList<AstNode *> fn_decls;
+};
+
+enum AstNodeTypeType {
+    AstNodeTypeTypePrimitive,
+    AstNodeTypeTypePointer,
+};
+
+enum AstPrimitiveType {
+    AstPrimitiveTypeVoid,
+    AstPrimitiveTypeU8,
+    AstPrimitiveTypeI8,
+    AstPrimitiveTypeU16,
+    AstPrimitiveTypeI16,
+    AstPrimitiveTypeU32,
+    AstPrimitiveTypeI32,
+    AstPrimitiveTypeU64,
+    AstPrimitiveTypeI64,
+    AstPrimitiveTypeUSize,
+    AstPrimitiveTypeISize,
+    AstPrimitiveTypeF32,
+    AstPrimitiveTypeF64,
+};
+
+
+struct AstNodeType {
+    AstNodeTypeType type;
+    AstPrimitiveType primitive_type;
+    AstNode *pointer_type;
+    bool is_const;
+};
+
+struct AstNodeParam {
+    Buf name;
+    AstNode *type;
 };
 
 enum AstState {
     AstStateStart,
+    AstStateFn,
+    AstStateFnLParen,
+    AstStateFnParamName,
+    AstStateParamColon,
+    AstStateType,
+    AstStateTypeEnd,
+    AstStateFnParamComma,
+    AstStateFnDeclArrow,
+    AstStateFnDeclBlock,
+    AstStatePointerType,
+    AstStateBlock,
+};
+
+struct AstNode {
+    enum NodeType type;
+    AstNode *parent;
+    AstState prev_state;
+    union {
+        AstNodeRoot root;
+        AstNodeFnDecl fn_decl;
+        AstNodeType type;
+        AstNodeParam param;
+    } data;
 };
 
 struct BuildAst {
@@ -402,6 +501,7 @@ struct BuildAst {
     AstState state;
     int line;
     int column;
+    AstNode *cur_node;
 };
 
 __attribute__ ((format (printf, 2, 3)))
@@ -418,36 +518,320 @@ static void ast_error(BuildAst *b, const char *format, ...) {
     exit(EXIT_FAILURE);
 }
 
-static inline bool mem_eql_str(const char *mem, size_t mem_len, const char *str) {
-    size_t str_len = strlen(str);
-    if (str_len != mem_len)
-        return false;
-    return memcmp(mem, str, mem_len) == 0;
+static AstNode *ast_create_node(BuildAst *b, NodeType type) {
+    AstNode *child = allocate<AstNode>(1);
+    child->prev_state = b->state;
+    child->parent = b->cur_node;
+    child->type = type;
+    return child;
+}
+
+static void ast_make_node_current(BuildAst *b, AstNode *node) {
+    b->cur_node = node;
+}
+
+static void ast_up_stack(BuildAst *b) {
+    assert(b->cur_node->parent);
+    b->state = b->cur_node->prev_state;
+    b->cur_node = b->cur_node->parent;
+}
+
+
+static const char *node_type_str(NodeType node_type) {
+    switch (node_type) {
+        case NodeTypeRoot:      return "Root";
+        case NodeTypeFnDecl:    return "FnDecl";
+        case NodeTypeParam:     return "Param";
+        case NodeTypeType:      return "Type";
+        case NodeTypeBlock:     return "Block";
+    }
+    zig_panic("unreachable");
+}
+
+static void print_ast(AstNode *node, int indent) {
+    for (int i = 0; i < indent; i += 1) {
+        fprintf(stderr, " ");
+    }
+
+    switch (node->type) {
+        case NodeTypeRoot:
+            fprintf(stderr, "%s\n", node_type_str(node->type));
+            for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
+                AstNode *child = node->data.root.fn_decls.at(i);
+                print_ast(child, indent + 2);
+            }
+            break;
+        case NodeTypeFnDecl:
+            {
+                Buf *name_buf = &node->data.fn_decl.name;
+                fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
+
+                for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
+                    AstNode *child = node->data.fn_decl.params.at(i);
+                    print_ast(child, indent + 2);
+                }
+
+                print_ast(node->data.fn_decl.return_type, indent + 2);
+
+                print_ast(node->data.fn_decl.body, indent + 2);
+
+                break;
+            }
+        case NodeTypeParam:
+            {
+                Buf *name_buf = &node->data.param.name;
+                fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
+
+                print_ast(node->data.param.type, indent + 2);
+                break;
+            }
+        case NodeTypeType:
+            fprintf(stderr, "%s\n", node_type_str(node->type));
+            break;
+        case NodeTypeBlock:
+            fprintf(stderr, "%s\n", node_type_str(node->type));
+            break;
+    }
 }
 
+static void ast_end_fn_param_list(BuildAst *b) {
+    b->state = AstStateFnDeclArrow;
+}
 
 static AstNode *build_ast(Buf *buf, ZigList<Token> *tokens) {
     BuildAst b = {0};
+    b.state = AstStateStart;
     b.buf = buf;
     b.root = allocate<AstNode>(1);
     b.root->type = NodeTypeRoot;
+    b.cur_node = b.root;
 
     for (int i = 0; i < tokens->length; i += 1) {
         Token *token = &tokens->at(i);
-        const char *token_str = buf_ptr(buf) + token->start_pos;
+        char *token_mem = buf_ptr(buf) + token->start_pos;
         int token_len = token->end_pos - token->start_pos;
         b.line = token->start_line;
         b.column = token->start_column;
         switch (b.state) {
             case AstStateStart:
-                if (mem_eql_str(token_str, token_len, "fn")) {
-                    zig_panic("TODO fn");
+                assert(b.cur_node->type == NodeTypeRoot);
+                if (token->id == TokenIdKeywordFn) {
+                    AstNode *child = ast_create_node(&b, NodeTypeFnDecl);
+                    b.cur_node->data.root.fn_decls.append(child);
+                    ast_make_node_current(&b, child);
+                    b.state = AstStateFn;
                 } else {
                     Buf msg = {0};
-                    buf_append_str(&msg, "unexpected symbol: '");
-                    buf_append_mem(&msg, token_str, token_len);
+                    buf_appendf(&msg, "unexpected %s: '", token_name(token));
+                    buf_append_mem(&msg, token_mem, token_len);
                     buf_append_str(&msg, "'");
                     ast_error(&b, "%s", buf_ptr(&msg));
+                    break;
+                }
+                break;
+            case AstStateFn:
+                if (token->id != TokenIdSymbol)
+                    ast_error(&b, "expected symbol");
+                buf_init_from_mem(&b.cur_node->data.fn_decl.name, token_mem, token_len);
+                b.state = AstStateFnLParen;
+                break;
+            case AstStateFnLParen:
+                if (token->id != TokenIdLParen)
+                    ast_error(&b, "expected '('");
+                b.state = AstStateFnParamName;
+                break;
+            case AstStateFnParamName:
+                switch (token->id) {
+                    case TokenIdSymbol:
+                        {
+                            b.state = AstStateFnParamComma;
+                            AstNode *child = ast_create_node(&b, NodeTypeParam);
+                            buf_init_from_mem(&child->data.param.name, token_mem, token_len);
+                            b.cur_node->data.fn_decl.params.append(child);
+                            ast_make_node_current(&b, child);
+                            b.state = AstStateParamColon;
+                            break;
+                        }
+                    case TokenIdRParen:
+                        ast_end_fn_param_list(&b);
+                        break;
+                    default:
+                        ast_error(&b, "expected parameter name");
+                        break;
+                }
+                break;
+            case AstStateParamColon:
+                {
+                    if (token->id != TokenIdColon)
+                        ast_error(&b, "expected ':'");
+                    assert(b.cur_node->type == NodeTypeParam);
+                    b.state = AstStateTypeEnd;
+                    AstNode *child = ast_create_node(&b, NodeTypeType);
+                    b.cur_node->data.param.type = child;
+                    ast_make_node_current(&b, child);
+                    b.state = AstStateType;
+                    break;
+                }
+            case AstStateType:
+                switch (token->id) {
+                    case TokenIdSymbol:
+                        assert(b.cur_node->type == NodeTypeType);
+                        if (mem_eql_str(token_mem, token_len, "u8")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeU8;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "i8")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeI8;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "u16")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeU16;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "i16")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeI16;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "u32")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeU32;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "i32")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeI32;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "u64")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeU64;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "i64")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeI64;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "usize")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeUSize;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "isize")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeISize;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "f32")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeF32;
+                            ast_up_stack(&b);
+                        } else if (mem_eql_str(token_mem, token_len, "f64")) {
+                            b.cur_node->data.type.type = AstNodeTypeTypePrimitive;
+                            b.cur_node->data.type.primitive_type = AstPrimitiveTypeF64;
+                            ast_up_stack(&b);
+                        } else {
+                            Buf msg = {0};
+                            buf_append_str(&msg, "invalid primitive type: '");
+                            buf_append_mem(&msg, token_mem, token_len);
+                            buf_append_str(&msg, "'");
+                            ast_error(&b, "%s", buf_ptr(&msg));
+                        }
+                        break;
+                    case TokenIdStar:
+                        b.cur_node->data.type.type = AstNodeTypeTypePointer;
+                        b.state = AstStatePointerType;
+                        break;
+                    default:
+                        ast_error(&b, "expected type name");
+                        break;
+                }
+                break;
+            case AstStatePointerType:
+                {
+                    if (token->id == TokenIdKeywordMut) {
+                        b.cur_node->data.type.is_const = false;
+                    } else if (token->id == TokenIdKeywordConst) {
+                        b.cur_node->data.type.is_const = true;
+                    } else {
+                        ast_error(&b, "expected 'mut' or 'const'");
+                    }
+                    b.state = AstStateTypeEnd;
+                    AstNode *child = ast_create_node(&b, NodeTypeType);
+                    b.cur_node->data.type.pointer_type = child;
+                    ast_make_node_current(&b, child);
+                    b.state = AstStateType;
+                    break;
+                }
+            case AstStateTypeEnd:
+                ast_up_stack(&b);
+                i -= 1;
+                continue;
+            case AstStateFnParamComma:
+                switch (token->id) {
+                    case TokenIdComma:
+                        b.state = AstStateFnParamName;
+                        break;
+                    case TokenIdRParen:
+                        ast_end_fn_param_list(&b);
+                        break;
+                    default:
+                        ast_error(&b, "expected ',' or ')'");
+                        break;
+
+                }
+                break;
+            case AstStateFnDeclArrow:
+                switch (token->id) {
+                    case TokenIdArrow:
+                        {
+                            assert(b.cur_node->type == NodeTypeFnDecl);
+                            b.state = AstStateFnDeclBlock;
+                            AstNode *child = ast_create_node(&b, NodeTypeType);
+                            b.cur_node->data.fn_decl.return_type = child;
+                            ast_make_node_current(&b, child);
+                            b.state = AstStateType;
+                            break;
+                        }
+                    case TokenIdLBrace:
+                        {
+                            AstNode *node = ast_create_node(&b, NodeTypeType);
+                            node->data.type.type = AstNodeTypeTypePrimitive;
+                            node->data.type.primitive_type = AstPrimitiveTypeVoid;
+                            b.cur_node->data.fn_decl.return_type = node;
+
+                            b.state = AstStateTypeEnd;
+                            AstNode *child = ast_create_node(&b, NodeTypeBlock);
+                            b.cur_node->data.fn_decl.body = child;
+                            ast_make_node_current(&b, child);
+                            b.state = AstStateBlock;
+                            break;
+                        }
+                    default:
+                        ast_error(&b, "expected '->' or '}'");
+                        break;
+                }
+                break;
+            case AstStateFnDeclBlock:
+                {
+                    if (token->id != TokenIdLBrace)
+                        ast_error(&b, "expected '{'");
+
+                    b.state = AstStateTypeEnd;
+                    AstNode *child = ast_create_node(&b, NodeTypeBlock);
+                    b.cur_node->data.fn_decl.body = child;
+                    ast_make_node_current(&b, child);
+                    b.state = AstStateBlock;
+                    break;
+                }
+            case AstStateBlock:
+                switch (token->id) {
+                    case TokenIdSymbol:
+                        zig_panic("TODO symbol");
+                        break;
+                    default:
+                        {
+                            Buf msg = {0};
+                            buf_appendf(&msg, "unexpected %s: '", token_name(token));
+                            buf_append_mem(&msg, token_mem, token_len);
+                            buf_append_str(&msg, "'");
+                            ast_error(&b, "%s", buf_ptr(&msg));
+                            break;
+                        }
                 }
                 break;
         }
@@ -456,10 +840,6 @@ static AstNode *build_ast(Buf *buf, ZigList<Token> *tokens) {
     return b.root;
 }
 
-static void print_ast(AstNode *node) {
-    zig_panic("TODO");
-}
-
 char cur_dir[1024];
 
 int main(int argc, char **argv) {
@@ -502,12 +882,12 @@ int main(int argc, char **argv) {
         char *result = getcwd(cur_dir, sizeof(cur_dir));
         if (!result)
             zig_panic("unable to get current working directory: %s", strerror(errno));
-        cur_dir_path = buf_from_str(result);
+        cur_dir_path = buf_create_from_str(result);
     } else {
         in_f = fopen(in_file, "rb");
         if (!in_f)
             zig_panic("unable to open %s for reading: %s\n", in_file, strerror(errno));
-        cur_dir_path = buf_dirname(buf_from_str(in_file));
+        cur_dir_path = buf_dirname(buf_create_from_str(in_file));
     }
 
     Buf *in_data = fetch_file(in_f);
@@ -523,7 +903,7 @@ int main(int argc, char **argv) {
     print_tokens(in_data, tokens);
 
     AstNode *root = build_ast(in_data, tokens);
-    print_ast(root);
+    print_ast(root, 0);
 
 
     return EXIT_SUCCESS;
test/hello.zig
@@ -1,6 +1,6 @@
 
 
-fn main(argc: int, argv: *mut char) -> int {
+fn main(argc: isize, argv: *mut u8) -> isize {
     puts("Hello, world!\n");
     return 0;
 }
README.md
@@ -29,4 +29,9 @@ readable, safe, optimal, and concise code to solve any computing problem.
 ## Roadmap
 
  * Hello, world.
+   - Build AST
+   - Code Gen
+ * C style comments.
+ * Unit tests.
+ * Simple .so library
  * How should the Widget use case be solved? In Genesis I'm using C++ and inheritance.