Commit f6eecfe5f4

Josh Wolfe <thejoshwolfe@gmail.com>
2015-12-07 16:29:19
getting started on array types
1 parent 94e6128
src/analyze.cpp
@@ -48,18 +48,23 @@ static void set_root_export_version(CodeGen *g, Buf *version_buf, AstNode *node)
     }
 }
 
+TypeTableEntry *new_type_table_entry() {
+    TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+    entry->arrays_by_size.init(2);
+    return entry;
+}
+
 TypeTableEntry *get_pointer_to_type(CodeGen *g, TypeTableEntry *child_type, bool is_const) {
     TypeTableEntry **parent_pointer = is_const ?
         &child_type->pointer_const_parent :
         &child_type->pointer_mut_parent;
-    const char *const_or_mut_str = is_const ? "const" : "mut";
     if (*parent_pointer) {
         return *parent_pointer;
     } else {
-        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        TypeTableEntry *entry = new_type_table_entry();
         entry->type_ref = LLVMPointerType(child_type->type_ref, 0);
         buf_resize(&entry->name, 0);
-        buf_appendf(&entry->name, "*%s %s", const_or_mut_str, buf_ptr(&child_type->name));
+        buf_appendf(&entry->name, "*%s %s", is_const ? "const" : "mut", buf_ptr(&child_type->name));
         entry->di_type = LLVMZigCreateDebugPointerType(g->dbuilder, child_type->di_type,
                 g->pointer_size_bytes * 8, g->pointer_size_bytes * 8, buf_ptr(&entry->name));
         g->type_table.put(&entry->name, entry);
@@ -68,6 +73,28 @@ TypeTableEntry *get_pointer_to_type(CodeGen *g, TypeTableEntry *child_type, bool
     }
 }
 
+static TypeTableEntry *get_array_type(CodeGen *g, TypeTableEntry *child_type, int array_size) {
+    auto existing_entry = child_type->arrays_by_size.maybe_get(array_size);
+    if (existing_entry) {
+        return existing_entry->value;
+    } else {
+        TypeTableEntry *entry = new_type_table_entry();
+        entry->type_ref = LLVMArrayType(child_type->type_ref, array_size);
+        buf_resize(&entry->name, 0);
+        buf_appendf(&entry->name, "[%s; %ud]", buf_ptr(&child_type->name), array_size);
+        //entry->di_type = LLVMZigCreateDebugArrayType(g->dbuilder, ..., buf_ptr(&entry->name)); // TODO
+
+        g->type_table.put(&entry->name, entry);
+        child_type->arrays_by_size.put(array_size, entry);
+        return entry;
+    }
+}
+
+static int parse_int(Buf *number) {
+    // TODO: think about integer size of array sizes
+    return atoi(buf_ptr(number));
+}
+
 static TypeTableEntry *resolve_type(CodeGen *g, AstNode *node) {
     assert(node->type == NodeTypeType);
     assert(!node->codegen_node);
@@ -98,6 +125,28 @@ static TypeTableEntry *resolve_type(CodeGen *g, AstNode *node) {
                 type_node->entry = get_pointer_to_type(g, child_type, node->data.type.is_const);
                 return type_node->entry;
             }
+        case AstNodeTypeTypeArray:
+            {
+                resolve_type(g, node->data.type.child_type);
+                TypeTableEntry *child_type = node->data.type.child_type->codegen_node->data.type_node.entry;
+                if (child_type == g->builtin_types.entry_unreachable) {
+                    add_node_error(g, node,
+                            buf_create_from_str("array of unreachable not allowed"));
+                }
+
+                AstNode *size_node = node->data.type.array_size;
+                int size; // TODO: think about integer size of array sizes
+                if (size_node->type != NodeTypeNumberLiteral) {
+                    add_node_error(g, size_node,
+                        buf_create_from_str("array size must be literal number"));
+                    size = -1;
+                } else {
+                    size = parse_int(&size_node->data.number);
+                }
+
+                type_node->entry = get_array_type(g, child_type, size); // TODO
+                return type_node->entry;
+            }
     }
     zig_unreachable();
 }
src/analyze.hpp
@@ -18,6 +18,7 @@ struct BlockContext;
 
 void semantic_analyze(CodeGen *g);
 void add_node_error(CodeGen *g, AstNode *node, Buf *msg);
+TypeTableEntry *new_type_table_entry();
 TypeTableEntry *get_pointer_to_type(CodeGen *g, TypeTableEntry *child_type, bool is_const);
 LocalVariableTableEntry *find_local_variable(BlockContext *context, Buf *name);
 
src/codegen.cpp
@@ -781,12 +781,12 @@ static void do_code_gen(CodeGen *g) {
 static void define_primitive_types(CodeGen *g) {
     {
         // if this type is anywhere in the AST, we should never hit codegen.
-        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        TypeTableEntry *entry = new_type_table_entry();
         buf_init_from_str(&entry->name, "(invalid)");
         g->builtin_types.entry_invalid = entry;
     }
     {
-        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        TypeTableEntry *entry = new_type_table_entry();
         entry->type_ref = LLVMInt1Type();
         buf_init_from_str(&entry->name, "bool");
         entry->di_type = LLVMZigCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name), 1, 8,
@@ -795,7 +795,7 @@ static void define_primitive_types(CodeGen *g) {
         g->builtin_types.entry_bool = entry;
     }
     {
-        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        TypeTableEntry *entry = new_type_table_entry();
         entry->type_ref = LLVMInt8Type();
         buf_init_from_str(&entry->name, "u8");
         entry->di_type = LLVMZigCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name), 8, 8,
@@ -805,7 +805,7 @@ static void define_primitive_types(CodeGen *g) {
     }
     g->builtin_types.entry_string_literal = get_pointer_to_type(g, g->builtin_types.entry_u8, true);
     {
-        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        TypeTableEntry *entry = new_type_table_entry();
         entry->type_ref = LLVMInt32Type();
         buf_init_from_str(&entry->name, "i32");
         entry->di_type = LLVMZigCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name), 32, 32,
@@ -814,7 +814,7 @@ static void define_primitive_types(CodeGen *g) {
         g->builtin_types.entry_i32 = entry;
     }
     {
-        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        TypeTableEntry *entry = new_type_table_entry();
         entry->type_ref = LLVMVoidType();
         buf_init_from_str(&entry->name, "void");
         entry->di_type = LLVMZigCreateDebugBasicType(g->dbuilder, buf_ptr(&entry->name), 0, 0,
@@ -823,7 +823,7 @@ static void define_primitive_types(CodeGen *g) {
         g->builtin_types.entry_void = entry;
     }
     {
-        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        TypeTableEntry *entry = new_type_table_entry();
         entry->type_ref = LLVMVoidType();
         buf_init_from_str(&entry->name, "unreachable");
         entry->di_type = g->builtin_types.entry_void->di_type;
src/parser.cpp
@@ -180,6 +180,13 @@ void ast_print(AstNode *node, int indent) {
                         ast_print(node->data.type.child_type, indent + 2);
                         break;
                     }
+                case AstNodeTypeTypeArray:
+                    {
+                        fprintf(stderr, "ArrayType\n");
+                        ast_print(node->data.type.child_type, indent + 2);
+                        ast_print(node->data.type.array_size, indent + 2);
+                        break;
+                    }
             }
             break;
         case NodeTypeReturnExpr:
@@ -448,8 +455,9 @@ static void ast_parse_directives(ParseContext *pc, int *token_index,
 
 
 /*
-Type : token(Symbol) | PointerType | token(Unreachable)
-PointerType : token(Star) token(Const) Type  | token(Star) token(Mut) Type;
+Type : token(Symbol) | token(Unreachable) | token(Void) | PointerType | ArrayType
+PointerType : token(Star) (token(Const) | token(Mut)) Type
+ArrayType : token(LBracket) Type token(Semicolon) token(Number) token(RBracket)
 */
 static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) {
     Token *token = &pc->tokens->at(token_index);
@@ -463,12 +471,6 @@ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token
     } else if (token->id == TokenIdKeywordVoid) {
         node->data.type.type = AstNodeTypeTypePrimitive;
         buf_init_from_str(&node->data.type.primitive_name, "void");
-    } else if (token->id == TokenIdKeywordTrue) {
-        node->data.type.type = AstNodeTypeTypePrimitive;
-        buf_init_from_str(&node->data.type.primitive_name, "true");
-    } else if (token->id == TokenIdKeywordFalse) {
-        node->data.type.type = AstNodeTypeTypePrimitive;
-        buf_init_from_str(&node->data.type.primitive_name, "false");
     } else if (token->id == TokenIdSymbol) {
         node->data.type.type = AstNodeTypeTypePrimitive;
         ast_buf_from_token(pc, token, &node->data.type.primitive_name);
@@ -485,6 +487,20 @@ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token
         }
 
         node->data.type.child_type = ast_parse_type(pc, token_index, &token_index);
+    } else if (token->id == TokenIdLBracket) {
+        node->data.type.type = AstNodeTypeTypeArray;
+
+        node->data.type.child_type = ast_parse_type(pc, token_index, &token_index);
+
+        Token *semicolon_token = &pc->tokens->at(token_index);
+        token_index += 1;
+        ast_expect_token(pc, semicolon_token, TokenIdSemicolon);
+
+        node->data.type.array_size = ast_parse_expression(pc, &token_index, true);
+
+        Token *rbracket_token = &pc->tokens->at(token_index);
+        token_index += 1;
+        ast_expect_token(pc, rbracket_token, TokenIdRBracket);
     } else {
         ast_invalid_token_error(pc, token);
     }
@@ -494,8 +510,7 @@ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token
 }
 
 /*
-ParamDecl<node> : token(Symbol) token(Colon) Type {
-};
+ParamDecl : token(Symbol) token(Colon) Type
 */
 static AstNode *ast_parse_param_decl(ParseContext *pc, int token_index, int *new_token_index) {
     Token *param_name = &pc->tokens->at(token_index);
src/parser.hpp
@@ -81,12 +81,14 @@ struct AstNodeParamDecl {
 enum AstNodeTypeType {
     AstNodeTypeTypePrimitive,
     AstNodeTypeTypePointer,
+    AstNodeTypeTypeArray,
 };
 
 struct AstNodeType {
     AstNodeTypeType type;
     Buf primitive_name;
     AstNode *child_type;
+    AstNode *array_size;
     bool is_const;
 };
 
src/semantic_info.hpp
@@ -23,8 +23,11 @@ struct TypeTableEntry {
     bool pointer_is_const;
     int user_defined_id;
     Buf name;
+
+    // use these fields to make sure we don't duplicate type table entries for the same type
     TypeTableEntry *pointer_const_parent;
     TypeTableEntry *pointer_mut_parent;
+    HashMap<int, TypeTableEntry *, int_hash, int_eq> arrays_by_size;
 };
 
 struct ImportTableEntry {
src/tokenizer.cpp
@@ -259,6 +259,14 @@ void tokenize(Buf *buf, Tokenization *out) {
                         begin_token(&t, TokenIdRBrace);
                         end_token(&t);
                         break;
+                    case '[':
+                        begin_token(&t, TokenIdLBracket);
+                        end_token(&t);
+                        break;
+                    case ']':
+                        begin_token(&t, TokenIdRBracket);
+                        end_token(&t);
+                        break;
                     case ';':
                         begin_token(&t, TokenIdSemicolon);
                         end_token(&t);
@@ -601,6 +609,8 @@ static const char * token_name(Token *token) {
         case TokenIdStar: return "Star";
         case TokenIdLBrace: return "LBrace";
         case TokenIdRBrace: return "RBrace";
+        case TokenIdLBracket: return "LBracket";
+        case TokenIdRBracket: return "RBracket";
         case TokenIdStringLiteral: return "StringLiteral";
         case TokenIdSemicolon: return "Semicolon";
         case TokenIdNumberLiteral: return "NumberLiteral";
src/tokenizer.hpp
@@ -36,6 +36,8 @@ enum TokenId {
     TokenIdStar,
     TokenIdLBrace,
     TokenIdRBrace,
+    TokenIdLBracket,
+    TokenIdRBracket,
     TokenIdStringLiteral,
     TokenIdSemicolon,
     TokenIdNumberLiteral,
src/util.cpp
@@ -19,3 +19,10 @@ void zig_panic(const char *format, ...) {
     va_end(ap);
     abort();
 }
+
+uint32_t int_hash(int i) {
+    return *reinterpret_cast<uint32_t*>(&i);
+}
+bool int_eq(int a, int b) {
+    return a == b;
+}
src/util.hpp
@@ -9,6 +9,7 @@
 #define ZIG_UTIL_HPP
 
 #include <stdlib.h>
+#include <stdint.h>
 #include <string.h>
 #include <assert.h>
 
@@ -78,4 +79,7 @@ static inline bool mem_eql_str(const char *mem, size_t mem_len, const char *str)
     return memcmp(mem, str, mem_len) == 0;
 }
 
+uint32_t int_hash(int i);
+bool int_eq(int a, int b);
+
 #endif
README.md
@@ -144,9 +144,11 @@ ParamDeclList : token(LParen) list(ParamDecl, token(Comma)) token(RParen)
 
 ParamDecl : token(Symbol) token(Colon) Type
 
-Type : token(Symbol) | PointerType | token(Unreachable)
+Type : token(Symbol) | token(Unreachable) | token(Void) | PointerType | ArrayType
 
-PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type
+PointerType : token(Star) (token(Const) | token(Mut)) Type
+
+ArrayType : token(LBracket) Type token(Semicolon) Expression token(RBracket)
 
 Block : token(LBrace) list(option(Statement), token(Semicolon)) token(RBrace)