Commit 3b4a2afb65

Andrew Kelley <superjoe30@gmail.com>
2015-11-24 06:47:25
semantic analysis checks for multiple definitions of functions
1 parent a22bc8d
src/buffer.hpp
@@ -149,5 +149,14 @@ static inline Buf *buf_dirname(Buf *buf) {
     return buf_create_from_mem("", 0);
 }
 
+static inline uint32_t buf_hash(Buf *buf) {
+    // FNV 32-bit hash
+    uint32_t h = 2166136261;
+    for (int i = 0; i < buf_len(buf); i += 1) {
+        h = h ^ ((uint8_t)buf->list.at(i));
+        h = h * 16777619;
+    }
+    return h;
+}
 
 #endif
src/codegen.cpp
@@ -0,0 +1,87 @@
+#include "codegen.hpp"
+#include "hash_map.hpp"
+
+#include <stdio.h>
+
+struct CodeGen {
+    AstNode *root;
+    HashMap<Buf *, AstNode *, buf_hash, buf_eql_buf> fn_decls;
+    ZigList<ErrorMsg> errors;
+};
+
+CodeGen *create_codegen(AstNode *root) {
+    CodeGen *g = allocate<CodeGen>(1);
+    g->root = root;
+    g->fn_decls.init(32);
+    return g;
+}
+
+static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) {
+    g->errors.add_one();
+    ErrorMsg *last_msg = &g->errors.last();
+    last_msg->line_start = node->line;
+    last_msg->column_start = node->column;
+    last_msg->line_end = -1;
+    last_msg->column_end = -1;
+    last_msg->msg = msg;
+}
+
+static void analyze_node(CodeGen *g, AstNode *node) {
+    switch (node->type) {
+        case NodeTypeRoot:
+            for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
+                AstNode *child = node->data.root.fn_decls.at(i);
+                analyze_node(g, child);
+            }
+            break;
+        case NodeTypeFnDecl:
+            {
+                auto entry = g->fn_decls.maybe_get(&node->data.fn_decl.name);
+                if (entry) {
+                    add_node_error(g, node,
+                            buf_sprintf("redefinition of '%s'", buf_ptr(&node->data.fn_decl.name)));
+                } else {
+                    g->fn_decls.put(&node->data.fn_decl.name, node);
+                    for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
+                        AstNode *child = node->data.fn_decl.params.at(i);
+                        analyze_node(g, child);
+                    }
+                    analyze_node(g, node->data.fn_decl.return_type);
+                    analyze_node(g, node->data.fn_decl.body);
+                }
+                break;
+            }
+        case NodeTypeParamDecl:
+            analyze_node(g, node->data.param_decl.type);
+            break;
+        case NodeTypeType:
+            break;
+        case NodeTypePointerType:
+            break;
+        case NodeTypeBlock:
+            break;
+        case NodeTypeStatement:
+            break;
+        case NodeTypeExpressionStatement:
+            break;
+        case NodeTypeReturnStatement:
+            break;
+        case NodeTypeExpression:
+            break;
+        case NodeTypeFnCall:
+            break;
+    }
+}
+
+void semantic_analyze(CodeGen *g) {
+    // Pass 1.
+    analyze_node(g, g->root);
+}
+
+void code_gen(CodeGen *g) {
+
+}
+
+ZigList<ErrorMsg> *codegen_error_messages(CodeGen *g) {
+    return &g->errors;
+}
src/codegen.hpp
@@ -0,0 +1,25 @@
+#ifndef ZIG_CODEGEN_HPP
+#define ZIG_CODEGEN_HPP
+
+#include "parser.hpp"
+
+struct CodeGen;
+
+struct ErrorMsg {
+    int line_start;
+    int column_start;
+    int line_end;
+    int column_end;
+    Buf *msg;
+};
+
+
+CodeGen *create_codegen(AstNode *root);
+
+void semantic_analyze(CodeGen *g);
+
+void code_gen(CodeGen *g);
+
+ZigList<ErrorMsg> *codegen_error_messages(CodeGen *g);
+
+#endif
src/hash_map.hpp
@@ -0,0 +1,220 @@
+#ifndef ZIG_HASH_MAP_HPP
+#define ZIG_HASH_MAP_HPP
+
+#include "util.hpp"
+
+#include <stdint.h>
+
+template<typename K, typename V, uint32_t (*HashFunction)(K key), bool (*EqualFn)(K a, K b)>
+class HashMap {
+public:
+    void init(int capacity) {
+        init_capacity(capacity);
+    }
+    void deinit(void) {
+        free(_entries);
+    }
+
+    struct Entry {
+        bool used;
+        int distance_from_start_index;
+        K key;
+        V value;
+    };
+
+    void clear() {
+        for (int i = 0; i < _capacity; i += 1) {
+            _entries[i].used = false;
+        }
+        _size = 0;
+        _max_distance_from_start_index = 0;
+        _modification_count += 1;
+    }
+
+    int size() const {
+        return _size;
+    }
+
+    void put(const K &key, const V &value) {
+        _modification_count += 1;
+        internal_put(key, value);
+
+        // if we get too full (80%), double the capacity
+        if (_size * 5 >= _capacity * 4) {
+            Entry *old_entries = _entries;
+            int old_capacity = _capacity;
+            init_capacity(_capacity * 2);
+            // dump all of the old elements into the new table
+            for (int i = 0; i < old_capacity; i += 1) {
+                Entry *old_entry = &old_entries[i];
+                if (old_entry->used)
+                    internal_put(old_entry->key, old_entry->value);
+            }
+            free(old_entries);
+        }
+    }
+
+    const V &get(const K &key) const {
+        Entry *entry = internal_get(key);
+        if (!entry)
+            zig_panic("key not found");
+        return entry->value;
+    }
+
+    Entry *maybe_get(const K &key) const {
+        return internal_get(key);
+    }
+
+    void remove(const K &key) {
+        _modification_count += 1;
+        int start_index = key_to_index(key);
+        for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
+            int index = (start_index + roll_over) % _capacity;
+            Entry *entry = &_entries[index];
+
+            if (!entry->used)
+                zig_panic("key not found");
+
+            if (!EqualFn(entry->key, key))
+                continue;
+
+            for (; roll_over < _capacity; roll_over += 1) {
+                int next_index = (start_index + roll_over + 1) % _capacity;
+                Entry *next_entry = &_entries[next_index];
+                if (!next_entry->used || next_entry->distance_from_start_index == 0) {
+                    entry->used = false;
+                    _size -= 1;
+                    return;
+                }
+                *entry = *next_entry;
+                entry->distance_from_start_index -= 1;
+                entry = next_entry;
+            }
+            zig_panic("shifting everything in the table");
+        }
+        zig_panic("key not found");
+    }
+
+    class Iterator {
+    public:
+        Entry *next() {
+            if (_inital_modification_count != _table->_modification_count)
+                zig_panic("concurrent modification");
+            if (_count >= _table->size())
+                return NULL;
+            for (; _index < _table->_capacity; _index += 1) {
+                Entry *entry = &_table->_entries[_index];
+                if (entry->used) {
+                    _index += 1;
+                    _count += 1;
+                    return entry;
+                }
+            }
+            zig_panic("no next item");
+        }
+    private:
+        const HashMap * _table;
+        // how many items have we returned
+        int _count = 0;
+        // iterator through the entry array
+        int _index = 0;
+        // used to detect concurrent modification
+        uint32_t _inital_modification_count;
+        Iterator(const HashMap * table) :
+                _table(table), _inital_modification_count(table->_modification_count) {
+        }
+        friend HashMap;
+    };
+
+    // you must not modify the underlying HashMap while this iterator is still in use
+    Iterator entry_iterator() const {
+        return Iterator(this);
+    }
+
+private:
+
+    Entry *_entries;
+    int _capacity;
+    int _size;
+    int _max_distance_from_start_index;
+    // this is used to detect bugs where a hashtable is edited while an iterator is running.
+    uint32_t _modification_count = 0;
+
+    void init_capacity(int capacity) {
+        _capacity = capacity;
+        _entries = allocate<Entry>(_capacity);
+        _size = 0;
+        _max_distance_from_start_index = 0;
+        for (int i = 0; i < _capacity; i += 1) {
+            _entries[i].used = false;
+        }
+    }
+
+    void internal_put(K key, V value) {
+        int start_index = key_to_index(key);
+        for (int roll_over = 0, distance_from_start_index = 0;
+                roll_over < _capacity; roll_over += 1, distance_from_start_index += 1)
+        {
+            int index = (start_index + roll_over) % _capacity;
+            Entry *entry = &_entries[index];
+
+            if (entry->used && !EqualFn(entry->key, key)) {
+                if (entry->distance_from_start_index < distance_from_start_index) {
+                    // robin hood to the rescue
+                    Entry tmp = *entry;
+                    if (distance_from_start_index > _max_distance_from_start_index)
+                        _max_distance_from_start_index = distance_from_start_index;
+                    *entry = {
+                        true,
+                        distance_from_start_index,
+                        key,
+                        value,
+                    };
+                    key = tmp.key;
+                    value = tmp.value;
+                    distance_from_start_index = tmp.distance_from_start_index;
+                }
+                continue;
+            }
+
+            if (!entry->used) {
+                // adding an entry. otherwise overwriting old value with
+                // same key
+                _size += 1;
+            }
+
+            if (distance_from_start_index > _max_distance_from_start_index)
+                _max_distance_from_start_index = distance_from_start_index;
+            *entry = {
+                true,
+                distance_from_start_index,
+                key,
+                value,
+            };
+            return;
+        }
+        zig_panic("put into a full HashMap");
+    }
+
+
+    Entry *internal_get(const K &key) const {
+        int start_index = key_to_index(key);
+        for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
+            int index = (start_index + roll_over) % _capacity;
+            Entry *entry = &_entries[index];
+
+            if (!entry->used)
+                return NULL;
+
+            if (EqualFn(entry->key, key))
+                return entry;
+        }
+        return NULL;
+    }
+
+    int key_to_index(const K &key) const {
+        return (int)(HashFunction(key) % ((uint32_t)_capacity));
+    }
+};
+
+#endif
src/main.cpp
@@ -12,6 +12,7 @@
 #include "parser.hpp"
 #include "tokenizer.hpp"
 #include "error.hpp"
+#include "codegen.hpp"
 
 #include <stdio.h>
 #include <string.h>
@@ -75,24 +76,41 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
         cur_dir_path = buf_dirname(buf_create_from_str(in_file));
     }
 
-    Buf *in_data = fetch_file(in_f);
-
     fprintf(stderr, "Original source:\n");
     fprintf(stderr, "----------------\n");
+    Buf *in_data = fetch_file(in_f);
     fprintf(stderr, "%s\n", buf_ptr(in_data));
 
-    ZigList<Token> *tokens = tokenize(in_data, cur_dir_path);
-
     fprintf(stderr, "\nTokens:\n");
     fprintf(stderr, "---------\n");
+    ZigList<Token> *tokens = tokenize(in_data, cur_dir_path);
     print_tokens(in_data, tokens);
 
-    AstNode *root = ast_parse(in_data, tokens);
-    assert(root);
     fprintf(stderr, "\nAST:\n");
     fprintf(stderr, "------\n");
+    AstNode *root = ast_parse(in_data, tokens);
+    assert(root);
     ast_print(root, 0);
 
+    fprintf(stderr, "\nSemantic Analysis:\n");
+    fprintf(stderr, "--------------------\n");
+    CodeGen *codegen = create_codegen(root);
+    semantic_analyze(codegen);
+    ZigList<ErrorMsg> *errors = codegen_error_messages(codegen);
+    if (errors->length == 0) {
+        fprintf(stderr, "OK\n");
+    } else {
+        for (int i = 0; i < errors->length; i += 1) {
+            ErrorMsg *err = &errors->at(i);
+            fprintf(stderr, "Error: Line %d, column %d: %s\n", err->line_start, err->column_start,
+                    buf_ptr(err->msg));
+        }
+        return 1;
+    }
+
+    fprintf(stderr, "\nCode Generation:\n");
+    fprintf(stderr, "------------------\n");
+    code_gen(codegen);
 
     return 0;
 }
src/parser.cpp
@@ -133,9 +133,11 @@ struct ParseContext {
     ZigList<Token> *tokens;
 };
 
-static AstNode *ast_create_node(NodeType type) {
+static AstNode *ast_create_node(NodeType type, Token *first_token) {
     AstNode *node = allocate<AstNode>(1);
     node->type = type;
+    node->line = first_token->start_line;
+    node->column = first_token->start_column;
     return node;
 }
 
@@ -163,11 +165,11 @@ Type : token(Symbol) | PointerType;
 PointerType : token(Star) token(Const) Type  | token(Star) token(Mut) Type;
 */
 static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) {
-    AstNode *node = ast_create_node(NodeTypeType);
-
     Token *token = &pc->tokens->at(token_index);
     token_index += 1;
 
+    AstNode *node = ast_create_node(NodeTypeType, token);
+
     if (token->id == TokenIdSymbol) {
         node->data.type.type = AstNodeTypeTypePrimitive;
         ast_buf_from_token(pc, token, &node->data.type.primitive_name);
@@ -197,12 +199,13 @@ ParamDecl<node> : token(Symbol) token(Colon) Type {
 };
 */
 static AstNode *ast_parse_param_decl(ParseContext *pc, int token_index, int *new_token_index) {
-    AstNode *node = ast_create_node(NodeTypeParamDecl);
-
     Token *param_name = &pc->tokens->at(token_index);
     token_index += 1;
     ast_expect_token(pc, param_name, TokenIdSymbol);
 
+    AstNode *node = ast_create_node(NodeTypeParamDecl, param_name);
+
+
     ast_buf_from_token(pc, param_name, &node->data.param_decl.name);
 
     Token *colon = &pc->tokens->at(token_index);
@@ -280,12 +283,13 @@ static void ast_parse_fn_call_param_list(ParseContext *pc, int token_index, int
 FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
 */
 static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_token_index) {
-    AstNode *node = ast_create_node(NodeTypeFnCall);
-
     Token *fn_name = &pc->tokens->at(token_index);
     token_index += 1;
     ast_expect_token(pc, fn_name, TokenIdSymbol);
 
+    AstNode *node = ast_create_node(NodeTypeFnCall, fn_name);
+
+
     ast_buf_from_token(pc, fn_name, &node->data.fn_call.name);
 
     ast_parse_fn_call_param_list(pc, token_index, &token_index, &node->data.fn_call.params);
@@ -295,9 +299,8 @@ static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_to
 }
 
 static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index) {
-    AstNode *node = ast_create_node(NodeTypeExpression);
-
     Token *token = &pc->tokens->at(token_index);
+    AstNode *node = ast_create_node(NodeTypeExpression, token);
     if (token->id == TokenIdSymbol) {
         node->data.expression.type = AstNodeExpressionTypeFnCall;
         node->data.expression.data.fn_call = ast_parse_fn_call(pc, token_index, &token_index);
@@ -329,9 +332,9 @@ Expression : token(Number)  | token(String)  | FnCall ;
 FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
 */
 static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_token_index) {
-    AstNode *node = ast_create_node(NodeTypeStatement);
-
     Token *token = &pc->tokens->at(token_index);
+    AstNode *node = ast_create_node(NodeTypeStatement, token);
+
     if (token->id == TokenIdKeywordReturn) {
         token_index += 1;
         node->data.statement.type = AstNodeStatementTypeReturn;
@@ -362,12 +365,13 @@ static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_
 Block : token(LBrace) many(Statement) token(RBrace);
 */
 static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_token_index) {
-    AstNode *node = ast_create_node(NodeTypeBlock);
-
     Token *l_brace = &pc->tokens->at(token_index);
     token_index += 1;
     ast_expect_token(pc, l_brace, TokenIdLBrace);
 
+    AstNode *node = ast_create_node(NodeTypeBlock, l_brace);
+
+
     for (;;) {
         Token *token = &pc->tokens->at(token_index);
         if (token->id == TokenIdRBrace) {
@@ -386,12 +390,13 @@ static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_toke
 FnDecl : token(Fn) token(Symbol) ParamDeclList option(token(Arrow) Type) Block;
 */
 static AstNode *ast_parse_fn_decl(ParseContext *pc, int token_index, int *new_token_index) {
-    AstNode *node = ast_create_node(NodeTypeFnDecl);
-
     Token *fn_token = &pc->tokens->at(token_index);
     token_index += 1;
     ast_expect_token(pc, fn_token, TokenIdKeywordFn);
 
+    AstNode *node = ast_create_node(NodeTypeFnDecl, fn_token);
+
+
     Token *fn_name = &pc->tokens->at(token_index);
     token_index += 1;
     ast_expect_token(pc, fn_name, TokenIdSymbol);
@@ -437,7 +442,7 @@ static void ast_parse_fn_decl_list(ParseContext *pc, int token_index, ZigList<As
 AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens) {
     ParseContext pc = {0};
     pc.buf = buf;
-    pc.root = ast_create_node(NodeTypeRoot);
+    pc.root = ast_create_node(NodeTypeRoot, &tokens->at(0));
     pc.tokens = tokens;
 
     int new_token_index;
src/parser.hpp
@@ -97,6 +97,8 @@ struct AstNodeFnCall {
 struct AstNode {
     enum NodeType type;
     AstNode *parent;
+    int line;
+    int column;
     union {
         AstNodeRoot root;
         AstNodeFnDecl fn_decl;
CMakeLists.txt
@@ -29,6 +29,7 @@ set(ZIG_SOURCES
     "${CMAKE_SOURCE_DIR}/src/parser.cpp"
     "${CMAKE_SOURCE_DIR}/src/tokenizer.cpp"
     "${CMAKE_SOURCE_DIR}/src/util.cpp"
+    "${CMAKE_SOURCE_DIR}/src/codegen.cpp"
 )
 
 set(CONFIGURE_OUT_FILE "${CMAKE_BINARY_DIR}/config.h")