Commit ca836191e1

Andrew Kelley <superjoe30@gmail.com>
2015-11-25 03:07:33
debug information for functions
1 parent baf5167
src/buffer.hpp
@@ -134,23 +134,6 @@ static inline void buf_splice_buf(Buf *buf, int start, int end, Buf *other) {
     memcpy(buf_ptr(buf) + start, buf_ptr(other), buf_len(other));
 }
 
-// TODO this method needs work
-static inline Buf *buf_dirname(Buf *buf) {
-    if (buf_len(buf) <= 2)
-        zig_panic("TODO buf_dirname small");
-    int last_index = buf_len(buf) - 1;
-    if (buf_ptr(buf)[buf_len(buf) - 1] == '/') {
-        last_index = buf_len(buf) - 2;
-    }
-    for (int i = last_index; i >= 0; i -= 1) {
-        uint8_t c = buf_ptr(buf)[i];
-        if (c == '/') {
-            return buf_slice(buf, 0, i);
-        }
-    }
-    return buf_create_from_mem("", 0);
-}
-
 static inline uint32_t buf_hash(Buf *buf) {
     // FNV 32-bit hash
     uint32_t h = 2166136261;
src/codegen.cpp
@@ -9,27 +9,63 @@
 #include "hash_map.hpp"
 #include "zig_llvm.hpp"
 #include "os.hpp"
+#include "config.h"
 
 #include <stdio.h>
 
+#include <llvm/IR/DIBuilder.h>
+#include <llvm/IR/DiagnosticInfo.h>
+#include <llvm/IR/DiagnosticPrinter.h>
+
 struct FnTableEntry {
     LLVMValueRef fn_value;
     AstNode *proto_node;
 };
 
+enum TypeId {
+    TypeIdUserDefined,
+    TypeIdPointer,
+    TypeIdU8,
+    TypeIdI32,
+    TypeIdVoid,
+    TypeIdUnreachable,
+};
+
+struct TypeTableEntry {
+    TypeId id;
+    LLVMTypeRef type_ref;
+    llvm::DIType *di_type;
+
+    TypeTableEntry *pointer_child;
+    bool pointer_is_const;
+    int user_defined_id;
+    Buf name;
+    TypeTableEntry *pointer_const_parent;
+    TypeTableEntry *pointer_mut_parent;
+};
+
 struct CodeGen {
     LLVMModuleRef mod;
     AstNode *root;
     HashMap<Buf *, AstNode *, buf_hash, buf_eql_buf> fn_defs;
     ZigList<ErrorMsg> errors;
     LLVMBuilderRef builder;
+    llvm::DIBuilder *dbuilder;
+    llvm::DICompileUnit *compile_unit;
     HashMap<Buf *, FnTableEntry *, buf_hash, buf_eql_buf> fn_table;
     HashMap<Buf *, LLVMValueRef, buf_hash, buf_eql_buf> str_table;
+    HashMap<Buf *, TypeTableEntry *, buf_hash, buf_eql_buf> type_table;
+    TypeTableEntry *invalid_type_entry;
+    LLVMTargetDataRef target_data_ref;
+    unsigned pointer_size_bytes;
+    bool is_static;
+    LLVMTargetMachineRef target_machine;
+    Buf in_file;
+    Buf in_dir;
 };
 
 struct TypeNode {
-    LLVMTypeRef type_ref;
-    bool is_unreachable;
+    TypeTableEntry *entry;
 };
 
 struct CodeGenNode {
@@ -38,12 +74,16 @@ struct CodeGenNode {
     } data;
 };
 
-CodeGen *create_codegen(AstNode *root) {
+CodeGen *create_codegen(AstNode *root, bool is_static, Buf *in_full_path) {
     CodeGen *g = allocate<CodeGen>(1);
     g->root = root;
     g->fn_defs.init(32);
     g->fn_table.init(32);
     g->str_table.init(32);
+    g->type_table.init(32);
+    g->is_static = is_static;
+
+    os_path_split(in_full_path, &g->in_dir, &g->in_file);
     return g;
 }
 
@@ -60,9 +100,17 @@ static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) {
 static LLVMTypeRef to_llvm_type(AstNode *type_node) {
     assert(type_node->type == NodeTypeType);
     assert(type_node->codegen_node);
-    assert(type_node->codegen_node->data.type_node.type_ref);
+    assert(type_node->codegen_node->data.type_node.entry);
+
+    return type_node->codegen_node->data.type_node.entry->type_ref;
+}
+
+static llvm::DIType *to_llvm_debug_type(AstNode *type_node) {
+    assert(type_node->type == NodeTypeType);
+    assert(type_node->codegen_node);
+    assert(type_node->codegen_node->data.type_node.entry);
 
-    return type_node->codegen_node->data.type_node.type_ref;
+    return type_node->codegen_node->data.type_node.entry->di_type;
 }
 
 
@@ -72,6 +120,56 @@ static bool type_is_unreachable(AstNode *type_node) {
             buf_eql_str(&type_node->data.type.primitive_name, "unreachable");
 }
 
+static void analyze_node(CodeGen *g, AstNode *node);
+
+static void resolve_type_and_recurse(CodeGen *g, AstNode *node) {
+    assert(!node->codegen_node);
+    node->codegen_node = allocate<CodeGenNode>(1);
+    TypeNode *type_node = &node->codegen_node->data.type_node;
+    switch (node->data.type.type) {
+        case AstNodeTypeTypePrimitive:
+            {
+                Buf *name = &node->data.type.primitive_name;
+                auto table_entry = g->type_table.maybe_get(name);
+                if (table_entry) {
+                    type_node->entry = table_entry->value;
+                } else {
+                    add_node_error(g, node,
+                            buf_sprintf("invalid type name: '%s'", buf_ptr(name)));
+                    type_node->entry = g->invalid_type_entry;
+                }
+                break;
+            }
+        case AstNodeTypeTypePointer:
+            {
+                analyze_node(g, node->data.type.child_type);
+                TypeNode *child_type_node = &node->data.type.child_type->codegen_node->data.type_node;
+                if (child_type_node->entry->id == TypeIdUnreachable) {
+                    add_node_error(g, node,
+                            buf_create_from_str("pointer to unreachable not allowed"));
+                }
+                TypeTableEntry **parent_pointer = node->data.type.is_const ?
+                    &child_type_node->entry->pointer_const_parent :
+                    &child_type_node->entry->pointer_mut_parent;
+                const char *const_or_mut_str = node->data.type.is_const ? "const" : "mut";
+                if (*parent_pointer) {
+                    type_node->entry = *parent_pointer;
+                } else {
+                    TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+                    entry->id = TypeIdPointer;
+                    entry->type_ref = LLVMPointerType(child_type_node->entry->type_ref, 0);
+                    buf_appendf(&entry->name, "*%s %s", const_or_mut_str, buf_ptr(&child_type_node->entry->name));
+                    entry->di_type = g->dbuilder->createPointerType(child_type_node->entry->di_type,
+                            g->pointer_size_bytes * 8, g->pointer_size_bytes * 8, buf_ptr(&entry->name));
+                    g->type_table.put(&entry->name, entry);
+                    type_node->entry = entry;
+                    *parent_pointer = entry;
+                }
+                break;
+            }
+    }
+}
+
 static void analyze_node(CodeGen *g, AstNode *node) {
     switch (node->type) {
         case NodeTypeRoot:
@@ -148,42 +246,10 @@ static void analyze_node(CodeGen *g, AstNode *node) {
         case NodeTypeParamDecl:
             analyze_node(g, node->data.param_decl.type);
             break;
+
         case NodeTypeType:
             {
-                node->codegen_node = allocate<CodeGenNode>(1);
-                TypeNode *type_node = &node->codegen_node->data.type_node;
-                switch (node->data.type.type) {
-                    case AstNodeTypeTypePrimitive:
-                        {
-                            Buf *name = &node->data.type.primitive_name;
-                            if (buf_eql_str(name, "u8")) {
-                                type_node->type_ref = LLVMInt8Type();
-                            } else if (buf_eql_str(name, "i32")) {
-                                type_node->type_ref = LLVMInt32Type();
-                            } else if (buf_eql_str(name, "void")) {
-                                type_node->type_ref = LLVMVoidType();
-                            } else if (buf_eql_str(name, "unreachable")) {
-                                type_node->type_ref = LLVMVoidType();
-                                type_node->is_unreachable = true;
-                            } else {
-                                add_node_error(g, node,
-                                        buf_sprintf("invalid type name: '%s'", buf_ptr(name)));
-                                type_node->type_ref = LLVMVoidType();
-                            }
-                            break;
-                        }
-                    case AstNodeTypeTypePointer:
-                        {
-                            analyze_node(g, node->data.type.child_type);
-                            TypeNode *child_type_node = &node->data.type.child_type->codegen_node->data.type_node;
-                            if (child_type_node->is_unreachable) {
-                                add_node_error(g, node,
-                                        buf_create_from_str("pointer to unreachable not allowed"));
-                            }
-                            type_node->type_ref = LLVMPointerType(child_type_node->type_ref, 0);
-                            break;
-                        }
-                }
+                resolve_type_and_recurse(g, node);
                 break;
             }
         case NodeTypeBlock:
@@ -224,10 +290,85 @@ static void analyze_node(CodeGen *g, AstNode *node) {
     }
 }
 
+static void add_types(CodeGen *g) {
+    {
+        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        entry->id = TypeIdU8;
+        entry->type_ref = LLVMInt8Type();
+        buf_init_from_str(&entry->name, "u8");
+        entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 8, 8, llvm::dwarf::DW_ATE_unsigned);
+        g->type_table.put(&entry->name, entry);
+    }
+    {
+        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        entry->id = TypeIdI32;
+        entry->type_ref = LLVMInt32Type();
+        buf_init_from_str(&entry->name, "i32");
+        entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 32, 32,
+                llvm::dwarf::DW_ATE_signed);
+        g->type_table.put(&entry->name, entry);
+    }
+    {
+        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        entry->id = TypeIdVoid;
+        entry->type_ref = LLVMVoidType();
+        buf_init_from_str(&entry->name, "void");
+        entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 0, 0,
+                llvm::dwarf::DW_ATE_unsigned);
+        g->type_table.put(&entry->name, entry);
+
+        // invalid types are void
+        g->invalid_type_entry = entry;
+    }
+    {
+        TypeTableEntry *entry = allocate<TypeTableEntry>(1);
+        entry->id = TypeIdUnreachable;
+        entry->type_ref = LLVMVoidType();
+        buf_init_from_str(&entry->name, "unreachable");
+        entry->di_type = g->invalid_type_entry->di_type;
+        g->type_table.put(&entry->name, entry);
+    }
+}
+
 
 void semantic_analyze(CodeGen *g) {
+    LLVMInitializeAllTargets();
+    LLVMInitializeAllTargetMCs();
+    LLVMInitializeAllAsmPrinters();
+    LLVMInitializeAllAsmParsers();
+    LLVMInitializeNativeTarget();
+
+    char *native_triple = LLVMGetDefaultTargetTriple();
+
+    LLVMTargetRef target_ref;
+    char *err_msg = nullptr;
+    if (LLVMGetTargetFromTriple(native_triple, &target_ref, &err_msg)) {
+        zig_panic("unable to get target from triple: %s", err_msg);
+    }
+
+    char *native_cpu = LLVMZigGetHostCPUName();
+    char *native_features = LLVMZigGetNativeFeatures();
+
+    LLVMCodeGenOptLevel opt_level = LLVMCodeGenLevelNone;
+
+    LLVMRelocMode reloc_mode = g->is_static ? LLVMRelocStatic : LLVMRelocPIC;
+
+    g->target_machine = LLVMCreateTargetMachine(target_ref, native_triple,
+            native_cpu, native_features, opt_level, reloc_mode, LLVMCodeModelDefault);
+
+    g->target_data_ref = LLVMGetTargetMachineData(g->target_machine);
+
+
     g->mod = LLVMModuleCreateWithName("ZigModule");
 
+    g->pointer_size_bytes = LLVMPointerSize(g->target_data_ref);
+
+    g->builder = LLVMCreateBuilder();
+    g->dbuilder = new llvm::DIBuilder(*llvm::unwrap(g->mod), true);
+
+
+    add_types(g);
+
     // Pass 1.
     analyze_node(g, g->root);
 }
@@ -344,8 +485,29 @@ static void gen_block(CodeGen *g, AstNode *block_node) {
     }
 }
 
+static llvm::DISubroutineType *create_di_function_type(CodeGen *g, AstNodeFnProto *fn_proto, llvm::DIFile *unit) {
+    llvm::SmallVector<llvm::Metadata *, 8> types;
+
+    llvm::DIType *return_type = to_llvm_debug_type(fn_proto->return_type);
+    types.push_back(return_type);
+
+    for (int i = 0; i < fn_proto->params.length; i += 1) {
+        AstNode *param_node = fn_proto->params.at(i);
+        llvm::DIType *param_type = to_llvm_debug_type(param_node);
+        types.push_back(param_type);
+    }
+
+    return g->dbuilder->createSubroutineType(unit, g->dbuilder->getOrCreateTypeArray(types));
+}
+
 void code_gen(CodeGen *g) {
-    g->builder = LLVMCreateBuilder();
+    Buf *producer = buf_sprintf("zig %s", ZIG_VERSION_STRING);
+    bool is_optimized = false;
+    const char *flags = "";
+    unsigned runtime_version = 0;
+    g->compile_unit = g->dbuilder->createCompileUnit(llvm::dwarf::DW_LANG_C99,
+            buf_ptr(&g->in_file), buf_ptr(&g->in_dir),
+            buf_ptr(producer), is_optimized, flags, runtime_version);
 
     auto it = g->fn_defs.entry_iterator();
     for (;;) {
@@ -369,9 +531,29 @@ void code_gen(CodeGen *g) {
         LLVMTypeRef function_type = LLVMFunctionType(ret_type, param_types, fn_proto->params.length, 0);
         LLVMValueRef fn = LLVMAddFunction(g->mod, buf_ptr(&fn_proto->name), function_type);
 
+        bool internal_linkage = false;
+        LLVMSetLinkage(fn, internal_linkage ? LLVMPrivateLinkage : LLVMExternalLinkage);
+
         if (type_is_unreachable(fn_proto->return_type)) {
             LLVMAddFunctionAttr(fn, LLVMNoReturnAttribute);
         }
+        LLVMAddFunctionAttr(fn, LLVMNoUnwindAttribute);
+
+        // Add debug info.
+        llvm::DIFile *unit = g->dbuilder->createFile(g->compile_unit->getFilename(),
+                g->compile_unit->getDirectory());
+        llvm::DIScope *fn_scope = unit;
+        unsigned line_number = fn_def_node->line + 1;
+        unsigned scope_line = line_number;
+        bool is_definition = true;
+        unsigned flags = 0;
+        llvm::Function *unwrapped_function = reinterpret_cast<llvm::Function*>(llvm::unwrap(fn));
+        g->dbuilder->createFunction(
+            fn_scope, buf_ptr(&fn_proto->name), "", unit, line_number,
+            create_di_function_type(g, fn_proto, unit), internal_linkage, 
+            is_definition, scope_line, flags, is_optimized, unwrapped_function);
+
+
 
         LLVMBasicBlockRef entry_block = LLVMAppendBasicBlock(fn, "entry");
         LLVMPositionBuilderAtEnd(g->builder, entry_block);
@@ -379,6 +561,8 @@ void code_gen(CodeGen *g) {
         gen_block(g, fn_def->body);
     }
 
+    g->dbuilder->finalize();
+
     LLVMDumpModule(g->mod);
 
     char *error = nullptr;
@@ -390,14 +574,7 @@ ZigList<ErrorMsg> *codegen_error_messages(CodeGen *g) {
 }
 
 
-void code_gen_link(CodeGen *g, bool is_static, const char *out_file) {
-    LLVMInitializeAllTargets();
-    LLVMInitializeAllTargetMCs();
-    LLVMInitializeAllAsmPrinters();
-    LLVMInitializeAllAsmParsers();
-    LLVMInitializeNativeTarget();
-
-
+void code_gen_link(CodeGen *g, const char *out_file) {
     LLVMPassRegistryRef registry = LLVMGetGlobalPassRegistry();
     LLVMInitializeCore(registry);
     LLVMInitializeCodeGen(registry);
@@ -405,29 +582,12 @@ void code_gen_link(CodeGen *g, bool is_static, const char *out_file) {
     LLVMZigInitializeLowerIntrinsicsPass(registry);
     LLVMZigInitializeUnreachableBlockElimPass(registry);
 
-    char *native_triple = LLVMGetDefaultTargetTriple();
-
-    LLVMTargetRef target_ref;
-    char *err_msg = nullptr;
-    if (LLVMGetTargetFromTriple(native_triple, &target_ref, &err_msg)) {
-        zig_panic("unable to get target from triple: %s", err_msg);
-    }
-
-    char *native_cpu = LLVMZigGetHostCPUName();
-    char *native_features = LLVMZigGetNativeFeatures();
-
-    LLVMCodeGenOptLevel opt_level = LLVMCodeGenLevelNone;
-
-    LLVMRelocMode reloc_mode = is_static ? LLVMRelocStatic : LLVMRelocPIC;
-
-    LLVMTargetMachineRef target_machine = LLVMCreateTargetMachine(target_ref, native_triple,
-            native_cpu, native_features, opt_level, reloc_mode, LLVMCodeModelDefault);
-
     Buf out_file_o = BUF_INIT;
     buf_init_from_str(&out_file_o, out_file);
     buf_append_str(&out_file_o, ".o");
 
-    if (LLVMTargetMachineEmitToFile(target_machine, g->mod, buf_ptr(&out_file_o), LLVMObjectFile, &err_msg)) {
+    char *err_msg = nullptr;
+    if (LLVMTargetMachineEmitToFile(g->target_machine, g->mod, buf_ptr(&out_file_o), LLVMObjectFile, &err_msg)) {
         zig_panic("unable to write object file: %s", err_msg);
     }
 
src/codegen.hpp
@@ -21,13 +21,13 @@ struct ErrorMsg {
 };
 
 
-CodeGen *create_codegen(AstNode *root);
+CodeGen *create_codegen(AstNode *root, bool is_static, Buf *in_file);
 
 void semantic_analyze(CodeGen *g);
 
 void code_gen(CodeGen *g);
 
-void code_gen_link(CodeGen *g, bool is_static, const char *out_file);
+void code_gen_link(CodeGen *g, const char *out_file);
 
 ZigList<ErrorMsg> *codegen_error_messages(CodeGen *g);
 
src/main.cpp
@@ -62,18 +62,15 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
         return usage(arg0);
 
     FILE *in_f;
-    Buf *cur_dir_path;
     if (strcmp(in_file, "-") == 0) {
         in_f = stdin;
         char *result = getcwd(cur_dir, sizeof(cur_dir));
         if (!result)
             zig_panic("unable to get current working directory: %s", strerror(errno));
-        cur_dir_path = buf_create_from_str(result);
     } else {
         in_f = fopen(in_file, "rb");
         if (!in_f)
             zig_panic("unable to open %s for reading: %s\n", in_file, strerror(errno));
-        cur_dir_path = buf_dirname(buf_create_from_str(in_file));
     }
 
     fprintf(stderr, "Original source:\n");
@@ -83,7 +80,7 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
 
     fprintf(stderr, "\nTokens:\n");
     fprintf(stderr, "---------\n");
-    ZigList<Token> *tokens = tokenize(in_data, cur_dir_path);
+    ZigList<Token> *tokens = tokenize(in_data);
     print_tokens(in_data, tokens);
 
     fprintf(stderr, "\nAST:\n");
@@ -94,7 +91,7 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
 
     fprintf(stderr, "\nSemantic Analysis:\n");
     fprintf(stderr, "--------------------\n");
-    CodeGen *codegen = create_codegen(root);
+    CodeGen *codegen = create_codegen(root, false, buf_create_from_str(in_file));
     semantic_analyze(codegen);
     ZigList<ErrorMsg> *errors = codegen_error_messages(codegen);
     if (errors->length == 0) {
@@ -115,7 +112,7 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
 
     fprintf(stderr, "\nLink:\n");
     fprintf(stderr, "------------------\n");
-    code_gen_link(codegen, false, out_file);
+    code_gen_link(codegen, out_file);
     fprintf(stderr, "OK\n");
 
     return 0;
src/os.cpp
@@ -31,3 +31,23 @@ void os_spawn_process(const char *exe, ZigList<const char *> &args, bool detache
     execvp(exe, const_cast<char * const *>(argv));
     zig_panic("execvp failed: %s", strerror(errno));
 }
+
+void os_path_split(Buf *full_path, Buf *out_dirname, Buf *out_basename) {
+    if (buf_len(full_path) <= 2)
+        zig_panic("TODO full path small");
+    int last_index = buf_len(full_path) - 1;
+    if (buf_ptr(full_path)[buf_len(full_path) - 1] == '/') {
+        last_index = buf_len(full_path) - 2;
+    }
+    for (int i = last_index; i >= 0; i -= 1) {
+        uint8_t c = buf_ptr(full_path)[i];
+        if (c == '/') {
+            buf_init_from_mem(out_dirname, buf_ptr(full_path), i);
+            buf_init_from_mem(out_basename, buf_ptr(full_path) + i + 1, buf_len(full_path) - (i + 1));
+            return;
+        }
+    }
+    buf_init_from_mem(out_dirname, ".", 1);
+    buf_init_from_buf(out_basename, full_path);
+}
+
src/os.hpp
@@ -13,4 +13,7 @@
 
 void os_spawn_process(const char *exe, ZigList<const char *> &args, bool detached);
 
+void os_path_split(Buf *full_path, Buf *out_dirname, Buf *out_basename);
+
+
 #endif
src/tokenizer.cpp
@@ -100,7 +100,6 @@ struct Tokenize {
     int line;
     int column;
     Token *cur_tok;
-    Buf *cur_dir_path;
 };
 
 __attribute__ ((format (printf, 2, 3)))
@@ -159,11 +158,10 @@ static void end_token(Tokenize *t) {
     t->cur_tok = nullptr;
 }
 
-ZigList<Token> *tokenize(Buf *buf, Buf *cur_dir_path) {
+ZigList<Token> *tokenize(Buf *buf) {
     Tokenize t = {0};
     t.tokens = allocate<ZigList<Token>>(1);
     t.buf = buf;
-    t.cur_dir_path = cur_dir_path;
     for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
         uint8_t c = buf_ptr(t.buf)[t.pos];
         switch (t.state) {
src/tokenizer.hpp
@@ -50,7 +50,7 @@ enum TokenizeState {
     TokenizeStateSawDash,
 };
 
-ZigList<Token> *tokenize(Buf *buf, Buf *cur_dir_path);
+ZigList<Token> *tokenize(Buf *buf);
 
 void print_tokens(Buf *buf, ZigList<Token> *tokens);
 
src/util.hpp
@@ -12,6 +12,7 @@
 #include <string.h>
 #include <assert.h>
 
+#include <new>
 
 #define BREAKPOINT __asm("int $0x03")