Commit 925c805d4b

Andrew Kelley <superjoe30@gmail.com>
2015-11-24 21:37:14
add unreachable expression
now creating .o file from hello.zig correctly
1 parent c2e5d50
src/codegen.cpp
@@ -26,9 +26,14 @@ struct CodeGen {
     HashMap<Buf *, LLVMValueRef, buf_hash, buf_eql_buf> str_table;
 };
 
+struct TypeNode {
+    LLVMTypeRef type_ref;
+    bool is_unreachable;
+};
+
 struct CodeGenNode {
     union {
-        LLVMTypeRef type_ref; // for NodeTypeType
+        TypeNode type_node; // for NodeTypeType
     } data;
 };
 
@@ -54,9 +59,16 @@ static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) {
 static LLVMTypeRef to_llvm_type(AstNode *type_node) {
     assert(type_node->type == NodeTypeType);
     assert(type_node->codegen_node);
-    assert(type_node->codegen_node->data.type_ref);
+    assert(type_node->codegen_node->data.type_node.type_ref);
 
-    return type_node->codegen_node->data.type_ref;
+    return type_node->codegen_node->data.type_node.type_ref;
+}
+
+
+static bool type_is_unreachable(AstNode *type_node) {
+    assert(type_node->type == NodeTypeType);
+    return type_node->data.type.type == AstNodeTypeTypePrimitive &&
+            buf_eql_str(&type_node->data.type.primitive_name, "unreachable");
 }
 
 static void analyze_node(CodeGen *g, AstNode *node) {
@@ -83,13 +95,18 @@ static void analyze_node(CodeGen *g, AstNode *node) {
                     AstNode *param_type = param_node->data.param_decl.type;
                     fn_param_values[param_i] = to_llvm_type(param_type);
                 }
-                LLVMTypeRef return_type = to_llvm_type(fn_proto->data.fn_proto.return_type);
+                AstNode *return_type_node = fn_proto->data.fn_proto.return_type;
+                LLVMTypeRef return_type = to_llvm_type(return_type_node);
 
                 LLVMTypeRef fn_type = LLVMFunctionType(return_type, fn_param_values, params->length, 0);
                 LLVMValueRef fn_val = LLVMAddFunction(g->mod, buf_ptr(name), fn_type);
                 LLVMSetLinkage(fn_val, LLVMExternalLinkage);
                 LLVMSetFunctionCallConv(fn_val, LLVMCCallConv);
 
+                if (type_is_unreachable(return_type_node)) {
+                    LLVMAddFunctionAttr(fn_val, LLVMNoReturnAttribute);
+                }
+
                 FnTableEntry *fn_table_entry = allocate<FnTableEntry>(1);
                 fn_table_entry->fn_value = fn_val;
                 fn_table_entry->proto_node = fn_proto;
@@ -131,33 +148,43 @@ static void analyze_node(CodeGen *g, AstNode *node) {
             analyze_node(g, node->data.param_decl.type);
             break;
         case NodeTypeType:
-            node->codegen_node = allocate<CodeGenNode>(1);
-            switch (node->data.type.type) {
-                case AstNodeTypeTypePrimitive:
-                    {
-                        Buf *name = &node->data.type.primitive_name;
-                        if (buf_eql_str(name, "u8")) {
-                            node->codegen_node->data.type_ref = LLVMInt8Type();
-                        } else if (buf_eql_str(name, "i32")) {
-                            node->codegen_node->data.type_ref = LLVMInt32Type();
-                        } else if (buf_eql_str(name, "void")) {
-                            node->codegen_node->data.type_ref = LLVMVoidType();
-                        } else {
-                            add_node_error(g, node,
-                                    buf_sprintf("invalid type name: '%s'", buf_ptr(name)));
-                            node->codegen_node->data.type_ref = LLVMInt8Type();
+            {
+                node->codegen_node = allocate<CodeGenNode>(1);
+                TypeNode *type_node = &node->codegen_node->data.type_node;
+                switch (node->data.type.type) {
+                    case AstNodeTypeTypePrimitive:
+                        {
+                            Buf *name = &node->data.type.primitive_name;
+                            if (buf_eql_str(name, "u8")) {
+                                type_node->type_ref = LLVMInt8Type();
+                            } else if (buf_eql_str(name, "i32")) {
+                                type_node->type_ref = LLVMInt32Type();
+                            } else if (buf_eql_str(name, "void")) {
+                                type_node->type_ref = LLVMVoidType();
+                            } else if (buf_eql_str(name, "unreachable")) {
+                                type_node->type_ref = LLVMVoidType();
+                                type_node->is_unreachable = true;
+                            } else {
+                                add_node_error(g, node,
+                                        buf_sprintf("invalid type name: '%s'", buf_ptr(name)));
+                                type_node->type_ref = LLVMVoidType();
+                            }
+                            break;
                         }
-                        break;
-                    }
-                case AstNodeTypeTypePointer:
-                    {
-                        analyze_node(g, node->data.type.child_type);
-                        node->codegen_node->data.type_ref = LLVMPointerType(
-                                node->data.type.child_type->codegen_node->data.type_ref, 0);
-                        break;
-                    }
+                    case AstNodeTypeTypePointer:
+                        {
+                            analyze_node(g, node->data.type.child_type);
+                            TypeNode *child_type_node = &node->data.type.child_type->codegen_node->data.type_node;
+                            if (child_type_node->is_unreachable) {
+                                add_node_error(g, node,
+                                        buf_create_from_str("pointer to unreachable not allowed"));
+                            }
+                            type_node->type_ref = LLVMPointerType(child_type_node->type_ref, 0);
+                            break;
+                        }
+                }
+                break;
             }
-            break;
         case NodeTypeBlock:
             for (int i = 0; i < node->data.block.statements.length; i += 1) {
                 AstNode *child = node->data.block.statements.at(i);
@@ -183,6 +210,8 @@ static void analyze_node(CodeGen *g, AstNode *node) {
                 case AstNodeExpressionTypeFnCall:
                     analyze_node(g, node->data.expression.data.fn_call);
                     break;
+                case AstNodeExpressionTypeUnreachable:
+                    break;
             }
             break;
         case NodeTypeFnCall:
@@ -235,7 +264,11 @@ static LLVMValueRef gen_fn_call(CodeGen *g, AstNode *fn_call_node) {
     LLVMValueRef result = LLVMBuildCall(g->builder, fn_table_entry->fn_value,
             param_values, actual_param_count, "");
 
-    return result;
+    if (type_is_unreachable(fn_table_entry->proto_node->data.fn_proto.return_type)) {
+        return LLVMBuildUnreachable(g->builder);
+    } else {
+        return result;
+    }
 }
 
 static LLVMValueRef find_or_create_string(CodeGen *g, Buf *str) {
@@ -280,6 +313,8 @@ static LLVMValueRef gen_expr(CodeGen *g, AstNode *expr_node) {
             }
         case AstNodeExpressionTypeFnCall:
             return gen_fn_call(g, expr_node->data.expression.data.fn_call);
+        case AstNodeExpressionTypeUnreachable:
+            return LLVMBuildUnreachable(g->builder);
     }
     zig_unreachable();
 }
@@ -333,6 +368,10 @@ void code_gen(CodeGen *g) {
         LLVMTypeRef function_type = LLVMFunctionType(ret_type, param_types, fn_proto->params.length, 0);
         LLVMValueRef fn = LLVMAddFunction(g->mod, buf_ptr(&fn_proto->name), function_type);
 
+        if (type_is_unreachable(fn_proto->return_type)) {
+            LLVMAddFunctionAttr(fn, LLVMNoReturnAttribute);
+        }
+
         LLVMBasicBlockRef entry_block = LLVMAppendBasicBlock(fn, "entry");
         LLVMPositionBuilderAtEnd(g->builder, entry_block);
 
src/main.cpp
@@ -102,7 +102,8 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
     } else {
         for (int i = 0; i < errors->length; i += 1) {
             ErrorMsg *err = &errors->at(i);
-            fprintf(stderr, "Error: Line %d, column %d: %s\n", err->line_start, err->column_start,
+            fprintf(stderr, "Error: Line %d, column %d: %s\n",
+                    err->line_start + 1, err->column_start + 1,
                     buf_ptr(err->msg));
         }
         return 1;
@@ -115,6 +116,7 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
     fprintf(stderr, "\nLink:\n");
     fprintf(stderr, "------------------\n");
     code_gen_link(codegen, false, out_file);
+    fprintf(stderr, "OK\n");
 
     return 0;
 }
src/parser.cpp
@@ -236,7 +236,7 @@ static void ast_expect_token(ParseContext *pc, Token *token, TokenId token_id) {
 }
 
 /*
-Type : token(Symbol) | PointerType;
+Type : token(Symbol) | PointerType | token(Unreachable)
 PointerType : token(Star) token(Const) Type  | token(Star) token(Mut) Type;
 */
 static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) {
@@ -245,7 +245,10 @@ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token
 
     AstNode *node = ast_create_node(NodeTypeType, token);
 
-    if (token->id == TokenIdSymbol) {
+    if (token->id == TokenIdKeywordUnreachable) {
+        node->data.type.type = AstNodeTypeTypePrimitive;
+        buf_init_from_str(&node->data.type.primitive_name, "unreachable");
+    } else if (token->id == TokenIdSymbol) {
         node->data.type.type = AstNodeTypeTypePrimitive;
         ast_buf_from_token(pc, token, &node->data.type.primitive_name);
     } else if (token->id == TokenIdStar) {
@@ -373,10 +376,16 @@ static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_to
     return node;
 }
 
+/*
+Expression : token(Number) | token(String) | token(Unreachable) | FnCall
+*/
 static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index) {
     Token *token = &pc->tokens->at(token_index);
     AstNode *node = ast_create_node(NodeTypeExpression, token);
-    if (token->id == TokenIdSymbol) {
+    if (token->id == TokenIdKeywordUnreachable) {
+        node->data.expression.type = AstNodeExpressionTypeUnreachable;
+        token_index += 1;
+    } else if (token->id == TokenIdSymbol) {
         node->data.expression.type = AstNodeExpressionTypeFnCall;
         node->data.expression.data.fn_call = ast_parse_fn_call(pc, token_index, &token_index);
     } else if (token->id == TokenIdNumberLiteral) {
@@ -402,7 +411,7 @@ ExpressionStatement : Expression token(Semicolon) ;
 
 ReturnStatement : token(Return) Expression token(Semicolon) ;
 
-Expression : token(Number)  | token(String)  | FnCall ;
+Expression : token(Number) | token(String) | token(Unreachable) | FnCall
 
 FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
 */
@@ -420,6 +429,7 @@ static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_
         ast_expect_token(pc, semicolon, TokenIdSemicolon);
     } else if (token->id == TokenIdSymbol ||
                token->id == TokenIdStringLiteral ||
+               token->id == TokenIdKeywordUnreachable ||
                token->id == TokenIdNumberLiteral)
     {
         node->data.statement.type = AstNodeStatementTypeExpression;
src/parser.hpp
@@ -94,6 +94,7 @@ enum AstNodeExpressionType {
     AstNodeExpressionTypeNumber,
     AstNodeExpressionTypeString,
     AstNodeExpressionTypeFnCall,
+    AstNodeExpressionTypeUnreachable,
 };
 
 struct AstNodeExpression {
src/tokenizer.cpp
@@ -152,6 +152,8 @@ static void end_token(Tokenize *t) {
         t->cur_tok->id = TokenIdKeywordConst;
     } else if (mem_eql_str(token_mem, token_len, "extern")) {
         t->cur_tok->id = TokenIdKeywordExtern;
+    } else if (mem_eql_str(token_mem, token_len, "unreachable")) {
+        t->cur_tok->id = TokenIdKeywordUnreachable;
     }
 
     t->cur_tok = nullptr;
@@ -311,6 +313,7 @@ static const char * token_name(Token *token) {
         case TokenIdKeywordMut: return "Mut";
         case TokenIdKeywordReturn: return "Return";
         case TokenIdKeywordExtern: return "Extern";
+        case TokenIdKeywordUnreachable: return "Unreachable";
         case TokenIdLParen: return "LParen";
         case TokenIdRParen: return "RParen";
         case TokenIdComma: return "Comma";
src/tokenizer.hpp
@@ -18,6 +18,7 @@ enum TokenId {
     TokenIdKeywordMut,
     TokenIdKeywordConst,
     TokenIdKeywordExtern,
+    TokenIdKeywordUnreachable,
     TokenIdLParen,
     TokenIdRParen,
     TokenIdComma,
test/hello.zig
@@ -1,9 +1,9 @@
 extern {
     fn puts(s: *mut u8) -> i32;
-    fn exit(code: i32);
+    fn exit(code: i32) -> unreachable;
 }
 
-fn _start() {
+fn _start() -> unreachable {
     puts("Hello, world!");
     exit(0);
 }
README.md
@@ -40,8 +40,6 @@ readable, safe, optimal, and concise code to solve any computing problem.
 
 ## Roadmap
 
- * Hello, world.
-   - Produce .o file.
  * Produce executable file instead of .o file.
  * Add debugging symbols.
  * Debug/Release mode.
@@ -87,7 +85,7 @@ ParamDeclList : token(LParen) list(ParamDecl, token(Comma)) token(RParen)
 
 ParamDecl : token(Symbol) token(Colon) Type
 
-Type : token(Symbol) | PointerType
+Type : token(Symbol) | PointerType | token(Unreachable)
 
 PointerType : token(Star) token(Const) Type  | token(Star) token(Mut) Type
 
@@ -99,7 +97,7 @@ ExpressionStatement : Expression token(Semicolon)
 
 ReturnStatement : token(Return) Expression token(Semicolon)
 
-Expression : token(Number) | token(String) | FnCall
+Expression : token(Number) | token(String) | token(Unreachable) | FnCall
 
 FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen)
 ```