Commit 3c43bc9208

Andrew Kelley <superjoe30@gmail.com>
2016-01-06 09:28:58
support unknown size arrays
1 parent 4ef062b
doc/langref.md
@@ -72,7 +72,7 @@ PointerType : token(Ampersand) option(token(Const)) Type
 
 MaybeType : token(Question) Type
 
-ArrayType : token(LBracket) option(Expression) token(RBracket) Type
+ArrayType : token(LBracket) option(Expression) token(RBracket) option(token(Const)) Type
 
 Block : token(LBrace) list(option(Statement), token(Semicolon)) token(RBrace)
 
example/arrays/arrays.zig
@@ -19,9 +19,18 @@ pub fn main(argc: isize, argv: &&u8, env: &&u8) -> i32 {
         i += 1;
     }
 
-    if (accumulator == 15) {
-        print_str("OK\n");
+    if (accumulator != 15) {
+        print_str("BAD\n");
     }
 
+    if (get_array_len(array) != 5) {
+        print_str("BAD\n");
+    }
+
+    print_str("OK\n");
     return 0;
 }
+
+fn get_array_len(a: []u32) -> usize {
+    a.len
+}
example/rand/main.zig
@@ -7,8 +7,10 @@ const ARRAY_SIZE : u16 = 624;
 
 /// Use `rand_init` to initialize this state.
 struct Rand {
-    array: [u32; ARRAY_SIZE],
-    index: #typeof(ARRAY_SIZE),
+    // TODO use ARRAY_SIZE here
+    array: [624]u32,
+    // TODO use ARRAY_SIZE here
+    index: #typeof(624),
 
     /// Get 32 bits of randomness.
     pub fn get_u32(r: &Rand) -> u32 {
@@ -31,10 +33,11 @@ struct Rand {
     pub fn get_bytes(r: &Rand, buf: []u8) {
         var bytes_left = r.get_bytes_aligned(buf);
         if (bytes_left > 0) {
-            var rand_val_array : [u8; #sizeof(u32)];
+            var rand_val_array : [#sizeof(u32)]u8;
             *(rand_val_array.ptr as &u32) = r.get_u32();
             while (bytes_left > 0) {
-                buf[buf.len - bytes_left] = rand_val_array[#sizeof(u32) - bytes_left];
+                // TODO array index operator so we can remove the .ptr
+                buf.ptr[buf.len - bytes_left] = rand_val_array[#sizeof(u32) - bytes_left];
                 bytes_left -= 1;
             }
         }
@@ -46,7 +49,7 @@ struct Rand {
         const range = end - start;
         const leftover = #max_value(u64) % range;
         const upper_bound = #max_value(u64) - leftover;
-        var rand_val_array : [u8; #sizeof(u64)];
+        var rand_val_array : [#sizeof(u64)]u8;
 
         while (true) {
             r.get_bytes_aligned(rand_val_array);
@@ -79,7 +82,8 @@ struct Rand {
     fn get_bytes_aligned(r: &Rand, buf: []u8) -> usize {
         var bytes_left = buf.len;
         while (bytes_left > 4) {
-            *(&buf[buf.len - bytes_left] as &u32) = r.get_u32();
+            // TODO: array access so we can remove .ptr
+            *(&buf.ptr[buf.len - bytes_left] as &u32) = r.get_u32();
             bytes_left -= #sizeof(u32);
         }
         return bytes_left;
src/analyze.cpp
@@ -219,7 +219,7 @@ static TypeTableEntry *get_array_type(CodeGen *g, ImportTableEntry *import,
         TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdArray);
         entry->type_ref = LLVMArrayType(child_type->type_ref, array_size);
         buf_resize(&entry->name, 0);
-        buf_appendf(&entry->name, "[%s; %" PRIu64 "]", buf_ptr(&child_type->name), array_size);
+        buf_appendf(&entry->name, "[%" PRIu64 "]%s", array_size, buf_ptr(&child_type->name));
 
         entry->size_in_bits = child_type->size_in_bits * array_size;
         entry->align_in_bits = child_type->align_in_bits;
@@ -235,6 +235,55 @@ static TypeTableEntry *get_array_type(CodeGen *g, ImportTableEntry *import,
     }
 }
 
+static TypeTableEntry *get_unknown_size_array_type(CodeGen *g, ImportTableEntry *import,
+        TypeTableEntry *child_type, bool is_const)
+{
+    TypeTableEntry **parent_pointer = is_const ?
+        &child_type->unknown_size_array_const_parent :
+        &child_type->unknown_size_array_mut_parent;
+    if (*parent_pointer) {
+        return *parent_pointer;
+    } else {
+        TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdStruct);
+
+        buf_resize(&entry->name, 0);
+        buf_appendf(&entry->name, "[]%s", buf_ptr(&child_type->name));
+        entry->type_ref = LLVMStructCreateNamed(LLVMGetGlobalContext(), buf_ptr(&entry->name));
+
+        TypeTableEntry *pointer_type = get_pointer_to_type(g, child_type, is_const);
+
+        unsigned element_count = 2;
+        LLVMTypeRef element_types[] = {
+            pointer_type->type_ref,
+            g->builtin_types.entry_usize->type_ref,
+        };
+        LLVMStructSetBody(entry->type_ref, element_types, element_count, false);
+
+        entry->size_in_bits = g->pointer_size_bytes * 2 * 8;
+        entry->align_in_bits = g->pointer_size_bytes * 8;
+        entry->data.structure.is_packed = false;
+        entry->data.structure.is_unknown_size_array = true;
+        entry->data.structure.field_count = element_count;
+        entry->data.structure.fields = allocate<TypeStructField>(element_count);
+        entry->data.structure.fields[0].name = buf_create_from_str("ptr");
+        entry->data.structure.fields[0].type_entry = pointer_type;
+        entry->data.structure.fields[1].name = buf_create_from_str("len");
+        entry->data.structure.fields[1].type_entry = g->builtin_types.entry_usize;
+
+        LLVMZigDIType *di_element_types[] = {
+            pointer_type->di_type,
+            g->builtin_types.entry_usize->di_type,
+        };
+        LLVMZigDIScope *compile_unit_scope = LLVMZigCompileUnitToScope(g->compile_unit);
+        entry->di_type = LLVMZigCreateDebugStructType(g->dbuilder, compile_unit_scope,
+                buf_ptr(&entry->name), g->dummy_di_file, 0, entry->size_in_bits, entry->align_in_bits, 0,
+                nullptr, di_element_types, element_count, 0, nullptr, "");
+
+        *parent_pointer = entry;
+        return entry;
+    }
+}
+
 static TypeTableEntry *eval_const_expr(CodeGen *g, BlockContext *context,
         AstNode *node, AstNodeNumberLiteral *out_number_literal)
 {
@@ -313,38 +362,47 @@ static TypeTableEntry *resolve_type(CodeGen *g, AstNode *node, ImportTableEntry
             }
         case AstNodeTypeTypeArray:
             {
-                resolve_type(g, node->data.type.child_type, import, context);
-                TypeTableEntry *child_type = node->data.type.child_type->codegen_node->data.type_node.entry;
+                TypeTableEntry *child_type = resolve_type(g, node->data.type.child_type, import, context);
                 if (child_type->id == TypeTableEntryIdUnreachable) {
                     add_node_error(g, node,
                             buf_create_from_str("array of unreachable not allowed"));
-                }
-
-                AstNode *size_node = node->data.type.array_size;
-                TypeTableEntry *size_type = analyze_expression(g, import, context,
-                        g->builtin_types.entry_usize, size_node);
-                if (size_type->id == TypeTableEntryIdInvalid) {
                     type_node->entry = g->builtin_types.entry_invalid;
                     return type_node->entry;
                 }
 
-                AstNodeNumberLiteral number_literal;
-                TypeTableEntry *resolved_type = eval_const_expr(g, context, size_node, &number_literal);
+                AstNode *size_node = node->data.type.array_size;
 
-                if (resolved_type->id == TypeTableEntryIdInt) {
-                    if (resolved_type->data.integral.is_signed) {
-                        add_node_error(g, size_node,
-                            buf_create_from_str("array size must be unsigned integer"));
+                if (size_node) {
+                    TypeTableEntry *size_type = analyze_expression(g, import, context,
+                            g->builtin_types.entry_usize, size_node);
+                    if (size_type->id == TypeTableEntryIdInvalid) {
                         type_node->entry = g->builtin_types.entry_invalid;
+                        return type_node->entry;
+                    }
+
+                    AstNodeNumberLiteral number_literal;
+                    TypeTableEntry *resolved_type = eval_const_expr(g, context, size_node, &number_literal);
+
+                    if (resolved_type->id == TypeTableEntryIdInt) {
+                        if (resolved_type->data.integral.is_signed) {
+                            add_node_error(g, size_node,
+                                buf_create_from_str("array size must be unsigned integer"));
+                            type_node->entry = g->builtin_types.entry_invalid;
+                        } else {
+                            type_node->entry = get_array_type(g, import, child_type, number_literal.data.x_uint);
+                        }
                     } else {
-                        type_node->entry = get_array_type(g, import, child_type, number_literal.data.x_uint);
+                        add_node_error(g, size_node,
+                            buf_create_from_str("unable to resolve constant expression"));
+                        type_node->entry = g->builtin_types.entry_invalid;
                     }
+                    return type_node->entry;
                 } else {
-                    add_node_error(g, size_node,
-                        buf_create_from_str("unable to resolve constant expression"));
-                    type_node->entry = g->builtin_types.entry_invalid;
+                    type_node->entry = get_unknown_size_array_type(g, import, child_type,
+                            node->data.type.is_const);
+                    return type_node->entry;
                 }
-                return type_node->entry;
+
             }
         case AstNodeTypeTypeMaybe:
             {
@@ -1016,13 +1074,14 @@ static TypeTableEntry *resolve_type_compatibility(CodeGen *g, BlockContext *cont
         return expected_type;
     }
 
-    // implicit constant sized array to string conversion
-    if (expected_type == g->builtin_types.entry_string &&
+    // implicit constant sized array to unknown size array conversion
+    if (expected_type->id == TypeTableEntryIdStruct &&
+        expected_type->data.structure.is_unknown_size_array &&
         actual_type->id == TypeTableEntryIdArray &&
-        actual_type->data.array.child_type == g->builtin_types.entry_u8)
+        actual_type->data.array.child_type == expected_type->data.structure.fields[0].type_entry->data.pointer.child_type)
     {
         node->codegen_node->expr_node.implicit_cast.after_type = expected_type;
-        node->codegen_node->expr_node.implicit_cast.op = CastOpArrayToString;
+        node->codegen_node->expr_node.implicit_cast.op = CastOpToUnknownSizeArray;
         node->codegen_node->expr_node.implicit_cast.source_node = node;
         context->cast_expr_alloca_list.append(&node->codegen_node->expr_node.implicit_cast);
         return expected_type;
@@ -1292,11 +1351,12 @@ static TypeTableEntry *analyze_cast_expr(CodeGen *g, ImportTableEntry *import, B
     {
         cast_node->op = CastOpIntWidenOrShorten;
         return wanted_type;
-    } else if (wanted_type == g->builtin_types.entry_string &&
-                actual_type->id == TypeTableEntryIdArray &&
-                actual_type->data.array.child_type == g->builtin_types.entry_u8)
+    } else if (wanted_type->id == TypeTableEntryIdStruct &&
+               wanted_type->data.structure.is_unknown_size_array &&
+               actual_type->id == TypeTableEntryIdArray &&
+               actual_type->data.array.child_type == wanted_type->data.structure.fields[0].type_entry)
     {
-        cast_node->op = CastOpArrayToString;
+        cast_node->op = CastOpToUnknownSizeArray;
         context->cast_expr_alloca_list.append(cast_node);
         return wanted_type;
     } else if (actual_type->id == TypeTableEntryIdNumberLiteral &&
src/analyze.hpp
@@ -46,6 +46,7 @@ struct TypeTableEntryStruct {
     TypeStructField *fields;
     uint64_t size_bytes;
     bool is_invalid; // true if any fields are invalid
+    bool is_unknown_size_array;
     // reminder: hash tables must be initialized before use
     HashMap<Buf *, FnTableEntry *, buf_hash, buf_eql_buf> fn_table;
 
@@ -100,6 +101,8 @@ struct TypeTableEntry {
     TypeTableEntry *pointer_mut_parent;
     HashMap<uint64_t, TypeTableEntry *, uint64_hash, uint64_eq> arrays_by_size;
     TypeTableEntry *maybe_parent;
+    TypeTableEntry *unknown_size_array_const_parent;
+    TypeTableEntry *unknown_size_array_mut_parent;
 
 };
 
@@ -175,7 +178,6 @@ struct CodeGen {
         TypeTableEntry *entry_f32;
         TypeTableEntry *entry_f64;
         TypeTableEntry *entry_c_string_literal;
-        TypeTableEntry *entry_string;
         TypeTableEntry *entry_void;
         TypeTableEntry *entry_unreachable;
         TypeTableEntry *entry_invalid;
@@ -283,7 +285,7 @@ enum CastOp {
     CastOpNothing,
     CastOpPtrToInt,
     CastOpIntWidenOrShorten,
-    CastOpArrayToString,
+    CastOpToUnknownSizeArray,
     CastOpMaybeWrap,
     CastOpPointerReinterpret,
 };
src/codegen.cpp
@@ -462,14 +462,17 @@ static LLVMValueRef gen_bare_cast(CodeGen *g, AstNode *node, LLVMValueRef expr_v
                 add_debug_source_node(g, node);
                 return LLVMBuildTrunc(g->builder, expr_val, wanted_type->type_ref, "");
             }
-        case CastOpArrayToString:
+        case CastOpToUnknownSizeArray:
             {
                 assert(cast_node->ptr);
 
+                TypeTableEntry *pointer_type = wanted_type->data.structure.fields[0].type_entry;
+
                 add_debug_source_node(g, node);
 
                 LLVMValueRef ptr_ptr = LLVMBuildStructGEP(g->builder, cast_node->ptr, 0, "");
-                LLVMBuildStore(g->builder, expr_val, ptr_ptr);
+                LLVMValueRef expr_bitcast = LLVMBuildBitCast(g->builder, expr_val, pointer_type->type_ref, "");
+                LLVMBuildStore(g->builder, expr_bitcast, ptr_ptr);
 
                 LLVMValueRef len_ptr = LLVMBuildStructGEP(g->builder, cast_node->ptr, 1, "");
                 LLVMValueRef len_val = LLVMConstInt(g->builtin_types.entry_usize->type_ref,
@@ -1925,41 +1928,6 @@ static void define_builtin_types(CodeGen *g) {
         entry->di_type = g->builtin_types.entry_void->di_type;
         g->builtin_types.entry_unreachable = entry;
     }
-    {
-        TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdStruct);
-
-        TypeTableEntry *const_pointer_to_u8 = get_pointer_to_type(g, g->builtin_types.entry_u8, true);
-
-        unsigned element_count = 2;
-        LLVMTypeRef element_types[] = {
-            const_pointer_to_u8->type_ref,
-            g->builtin_types.entry_usize->type_ref
-        };
-        entry->type_ref = LLVMStructCreateNamed(LLVMGetGlobalContext(), "string");
-        LLVMStructSetBody(entry->type_ref, element_types, element_count, false);
-
-        buf_init_from_str(&entry->name, "string");
-        entry->size_in_bits = g->pointer_size_bytes * 2 * 8;
-        entry->align_in_bits = g->pointer_size_bytes;
-        entry->data.structure.is_packed = false;
-        entry->data.structure.field_count = element_count;
-        entry->data.structure.fields = allocate<TypeStructField>(element_count);
-        entry->data.structure.fields[0].name = buf_create_from_str("ptr");
-        entry->data.structure.fields[0].type_entry = const_pointer_to_u8;
-        entry->data.structure.fields[1].name = buf_create_from_str("len");
-        entry->data.structure.fields[1].type_entry = g->builtin_types.entry_usize;
-
-        LLVMZigDIType *di_element_types[] = {
-            const_pointer_to_u8->di_type,
-            g->builtin_types.entry_usize->di_type
-        };
-        LLVMZigDIScope *compile_unit_scope = LLVMZigCompileUnitToScope(g->compile_unit);
-        entry->di_type = LLVMZigCreateDebugStructType(g->dbuilder, compile_unit_scope,
-                "string", g->dummy_di_file, 0, entry->size_in_bits, entry->align_in_bits, 0,
-                nullptr, di_element_types, element_count, 0, nullptr, "");
-
-        g->builtin_types.entry_string = entry;
-    }
 }
 
 
@@ -2103,7 +2071,6 @@ static ImportTableEntry *codegen_add_code(CodeGen *g, Buf *abs_full_path,
     import_entry->type_table.put(&g->builtin_types.entry_f64->name, g->builtin_types.entry_f64);
     import_entry->type_table.put(&g->builtin_types.entry_void->name, g->builtin_types.entry_void);
     import_entry->type_table.put(&g->builtin_types.entry_unreachable->name, g->builtin_types.entry_unreachable);
-    import_entry->type_table.put(&g->builtin_types.entry_string->name, g->builtin_types.entry_string);
 
     import_entry->root = ast_parse(source_code, tokenization.tokens, import_entry, g->err_color);
     assert(import_entry->root);
src/parser.cpp
@@ -219,7 +219,7 @@ void ast_print(AstNode *node, int indent) {
                     }
                 case AstNodeTypeTypePointer:
                     {
-                        const char *const_or_mut_str = node->data.type.is_const ? "const" : "mut";
+                        const char *const_or_mut_str = node->data.type.is_const ? "const" : "var";
                         fprintf(stderr, "'%s' PointerType\n", const_or_mut_str);
 
                         ast_print(node->data.type.child_type, indent + 2);
@@ -227,9 +227,11 @@ void ast_print(AstNode *node, int indent) {
                     }
                 case AstNodeTypeTypeArray:
                     {
-                        fprintf(stderr, "ArrayType\n");
+                        const char *const_or_mut_str = node->data.type.is_const ? "const" : "var";
+                        fprintf(stderr, "'%s' ArrayType\n", const_or_mut_str);
+                        if (node->data.type.array_size)
+                            ast_print(node->data.type.array_size, indent + 2);
                         ast_print(node->data.type.child_type, indent + 2);
-                        ast_print(node->data.type.array_size, indent + 2);
                         break;
                     }
                 case AstNodeTypeTypeMaybe:
@@ -1107,6 +1109,12 @@ static AstNode *ast_parse_type(ParseContext *pc, int *token_index) {
 
         ast_eat_token(pc, token_index, TokenIdRBracket);
 
+        Token *const_tok = &pc->tokens->at(*token_index);
+        if (const_tok->id == TokenIdKeywordConst) {
+            *token_index += 1;
+            node->data.type.is_const = true;
+        }
+
         node->data.type.child_type = ast_parse_type(pc, token_index);
     } else {
         ast_invalid_token_error(pc, token);
std/std.zig
@@ -39,13 +39,13 @@ pub fn os_get_random_bytes(buf: &u8, count: usize) -> isize {
 
 // TODO error handling
 // TODO handle buffering and flushing (mutex protected)
-pub fn print_str(str: string) -> isize {
+pub fn print_str(str: []const u8) -> isize {
     fprint_str(stdout_fileno, str)
 }
 
 // TODO error handling
 // TODO handle buffering and flushing (mutex protected)
-pub fn fprint_str(fd: isize, str: string) -> isize {
+pub fn fprint_str(fd: isize, str: []const u8) -> isize {
     write(fd, str.ptr, str.len)
 }
 
@@ -73,6 +73,9 @@ fn digit_to_char(digit: u64) -> u8 {
 
 const max_u64_base10_digits: usize = 20;
 
+// TODO use an array for out_buf instead of pointer. this should give bounds checking in
+// debug mode and length can get optimized out in release mode. requires array slicing syntax
+// for the buf_print_u64 call.
 fn buf_print_i64(out_buf: &u8, x: i64) -> usize {
     if (x < 0) {
         out_buf[0] = '-';
@@ -82,6 +85,7 @@ fn buf_print_i64(out_buf: &u8, x: i64) -> usize {
     }
 }
 
+// TODO use an array for out_buf instead of pointer.
 fn buf_print_u64(out_buf: &u8, x: u64) -> usize {
     var buf: [max_u64_base10_digits]u8;
     var a = x;