Commit ad3f98c615

Andrew Kelley <superjoe30@gmail.com>
2015-12-03 23:59:14
parseh command, parses a C .h file and produces extern decls
1 parent 174e58a
cmake/Findclang.cmake
@@ -0,0 +1,16 @@
+# Copyright (c) 2015 Andrew Kelley
+# This file is MIT licensed.
+# See http://opensource.org/licenses/MIT
+
+# CLANG_FOUND
+# CLANG_INCLUDE_DIR
+# CLANG_LIBRARY
+
+find_path(CLANG_INCLUDE_DIR NAMES clang-c/Index.h)
+
+find_library(CLANG_LIBRARY NAMES clang)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CLANG DEFAULT_MSG CLANG_LIBRARY CLANG_INCLUDE_DIR)
+
+mark_as_advanced(CLANG_INCLUDE_DIR CLANG_LIBRARY)
src/buffer.hpp
@@ -59,6 +59,7 @@ static inline void buf_deinit(Buf *buf) {
 }
 
 static inline void buf_init_from_mem(Buf *buf, const char *ptr, int len) {
+    assert(len >= 0);
     buf->list.resize(len + 1);
     memcpy(buf_ptr(buf), ptr, len);
     buf->list.at(buf_len(buf)) = 0;
@@ -73,6 +74,7 @@ static inline void buf_init_from_buf(Buf *buf, Buf *other) {
 }
 
 static inline Buf *buf_create_from_mem(const char *ptr, int len) {
+    assert(len >= 0);
     Buf *buf = allocate<Buf>(1);
     buf_init_from_mem(buf, ptr, len);
     return buf;
@@ -82,6 +84,10 @@ static inline Buf *buf_create_from_str(const char *str) {
     return buf_create_from_mem(str, strlen(str));
 }
 
+static inline Buf *buf_create_from_buf(Buf *buf) {
+    return buf_create_from_mem(buf_ptr(buf), buf_len(buf));
+}
+
 static inline Buf *buf_slice(Buf *in_buf, int start, int end) {
     assert(in_buf->list.length);
     assert(start >= 0);
src/main.cpp
@@ -10,15 +10,17 @@
 #include "codegen.hpp"
 #include "os.hpp"
 #include "error.hpp"
+#include "parseh.hpp"
 
 #include <stdio.h>
 
 static int usage(const char *arg0) {
-    fprintf(stderr, "Usage: %s [command] [options] target\n"
+    fprintf(stderr, "Usage: %s [command] [options]\n"
         "Commands:\n"
         "  build                  create executable, object, or library from target\n"
         "  version                print version number and exit\n"
-        "Optional Options:\n"
+        "  parseh                 convert a c header file to zig extern declarations\n"
+        "Command: build target\n"
         "  --release              build with optimizations on and debug protection off\n"
         "  --static               output will be statically linked\n"
         "  --strip                exclude debug symbols\n"
@@ -27,11 +29,15 @@ static int usage(const char *arg0) {
         "  --output [file]        override destination path\n"
         "  --verbose              turn on compiler debug output\n"
         "  --color [auto|off|on]  enable or disable colored error messages\n"
+        "Command: parseh target\n"
+        "  -isystem [dir]         add additional search path for other .h files\n"
+        "  -dirafter [dir]        same as -isystem but do it last\n"
+        "  -B[prefix]             set the C compiler data path\n"
     , arg0);
     return EXIT_FAILURE;
 }
 
-static int version(void) {
+static int version(const char *arg0, int argc, char **argv) {
     printf("%s\n", ZIG_VERSION_STRING);
     return EXIT_SUCCESS;
 }
@@ -48,62 +54,11 @@ struct Build {
     ErrColor color;
 };
 
-static int build(const char *arg0, Build *b) {
+static int build(const char *arg0, int argc, char **argv) {
     int err;
-
-    if (!b->in_file)
-        return usage(arg0);
-
-    Buf in_file_buf = BUF_INIT;
-    buf_init_from_str(&in_file_buf, b->in_file);
-
-    Buf root_source_dir = BUF_INIT;
-    Buf root_source_code = BUF_INIT;
-    Buf root_source_name = BUF_INIT;
-    if (buf_eql_str(&in_file_buf, "-")) {
-        os_get_cwd(&root_source_dir);
-        if ((err = os_fetch_file(stdin, &root_source_code))) {
-            fprintf(stderr, "unable to read stdin: %s\n", err_str(err));
-            return 1;
-        }
-        buf_init_from_str(&root_source_name, "");
-    } else {
-        os_path_split(&in_file_buf, &root_source_dir, &root_source_name);
-        if ((err = os_fetch_file_path(buf_create_from_str(b->in_file), &root_source_code))) {
-            fprintf(stderr, "unable to open '%s': %s\n", b->in_file, err_str(err));
-            return 1;
-        }
-    }
-
-    CodeGen *g = codegen_create(&root_source_dir);
-    codegen_set_build_type(g, b->release ? CodeGenBuildTypeRelease : CodeGenBuildTypeDebug);
-    codegen_set_strip(g, b->strip);
-    codegen_set_is_static(g, b->is_static);
-    if (b->out_type != OutTypeUnknown)
-        codegen_set_out_type(g, b->out_type);
-    if (b->out_name)
-        codegen_set_out_name(g, buf_create_from_str(b->out_name));
-    codegen_set_verbose(g, b->verbose);
-    codegen_set_errmsg_color(g, b->color);
-    codegen_add_root_code(g, &root_source_name, &root_source_code);
-    codegen_link(g, b->out_file);
-
-    return 0;
-}
-
-enum Cmd {
-    CmdNone,
-    CmdBuild,
-    CmdVersion,
-};
-
-int main(int argc, char **argv) {
-    char *arg0 = argv[0];
-
     Build b = {0};
-    Cmd cmd = CmdNone;
 
-    for (int i = 1; i < argc; i += 1) {
+    for (int i = 0; i < argc; i += 1) {
         char *arg = argv[i];
         if (arg[0] == '-' && arg[1] == '-') {
             if (strcmp(arg, "--release") == 0) {
@@ -148,40 +103,109 @@ int main(int argc, char **argv) {
                     return usage(arg0);
                 }
             }
-        } else if (cmd == CmdNone) {
-            if (strcmp(arg, "build") == 0) {
-                cmd = CmdBuild;
-            } else if (strcmp(arg, "version") == 0) {
-                cmd = CmdVersion;
+        } else if (!b.in_file) {
+            b.in_file = arg;
+        } else {
+            return usage(arg0);
+        }
+    }
+
+    if (!b.in_file)
+        return usage(arg0);
+
+    Buf in_file_buf = BUF_INIT;
+    buf_init_from_str(&in_file_buf, b.in_file);
+
+    Buf root_source_dir = BUF_INIT;
+    Buf root_source_code = BUF_INIT;
+    Buf root_source_name = BUF_INIT;
+    if (buf_eql_str(&in_file_buf, "-")) {
+        os_get_cwd(&root_source_dir);
+        if ((err = os_fetch_file(stdin, &root_source_code))) {
+            fprintf(stderr, "unable to read stdin: %s\n", err_str(err));
+            return 1;
+        }
+        buf_init_from_str(&root_source_name, "");
+    } else {
+        os_path_split(&in_file_buf, &root_source_dir, &root_source_name);
+        if ((err = os_fetch_file_path(buf_create_from_str(b.in_file), &root_source_code))) {
+            fprintf(stderr, "unable to open '%s': %s\n", b.in_file, err_str(err));
+            return 1;
+        }
+    }
+
+    CodeGen *g = codegen_create(&root_source_dir);
+    codegen_set_build_type(g, b.release ? CodeGenBuildTypeRelease : CodeGenBuildTypeDebug);
+    codegen_set_strip(g, b.strip);
+    codegen_set_is_static(g, b.is_static);
+    if (b.out_type != OutTypeUnknown)
+        codegen_set_out_type(g, b.out_type);
+    if (b.out_name)
+        codegen_set_out_name(g, buf_create_from_str(b.out_name));
+    codegen_set_verbose(g, b.verbose);
+    codegen_set_errmsg_color(g, b.color);
+    codegen_add_root_code(g, &root_source_name, &root_source_code);
+    codegen_link(g, b.out_file);
+
+    return 0;
+}
+
+static int parseh(const char *arg0, int argc, char **argv) {
+    char *in_file = nullptr;
+    ZigList<const char *> clang_argv = {0};
+    for (int i = 0; i < argc; i += 1) {
+        char *arg = argv[i];
+        if (arg[0] == '-') {
+            if (arg[1] == 'I') {
+                clang_argv.append(arg);
+            } else if (strcmp(arg, "-isystem") == 0) {
+                if (i + 1 >= argc) {
+                    return usage(arg0);
+                }
+                clang_argv.append("-isystem");
+                clang_argv.append(argv[i + 1]);
+            } else if (arg[1] == 'B') {
+                clang_argv.append(arg);
             } else {
-                fprintf(stderr, "Unrecognized command: %s\n", arg);
+                fprintf(stderr, "unrecognized argument: %s", arg);
                 return usage(arg0);
             }
+        } else if (!in_file) {
+            in_file = arg;
         } else {
-            switch (cmd) {
-                case CmdNone:
-                    zig_unreachable();
-                case CmdBuild:
-                    if (!b.in_file) {
-                        b.in_file = arg;
-                    } else {
-                        return usage(arg0);
-                    }
-                    break;
-                case CmdVersion:
-                    return usage(arg0);
-            }
+            return usage(arg0);
         }
     }
+    if (!in_file) {
+        fprintf(stderr, "missing target argument");
+        return usage(arg0);
+    }
+
+    parse_h_file(in_file, &clang_argv, stdout);
+    return 0;
+}
 
-    switch (cmd) {
-        case CmdNone:
+int main(int argc, char **argv) {
+    char *arg0 = argv[0];
+    int (*cmd)(const char *, int, char **) = nullptr;
+    for (int i = 1; i < argc; i += 1) {
+        char *arg = argv[i];
+        if (arg[0] == '-' && arg[1] == '-') {
             return usage(arg0);
-        case CmdBuild:
-            return build(arg0, &b);
-        case CmdVersion:
-            return version();
+        } else {
+            if (strcmp(arg, "build") == 0) {
+                cmd = build;
+            } else if (strcmp(arg, "version") == 0) {
+                cmd = version;
+            } else if (strcmp(arg, "parseh") == 0) {
+                cmd = parseh;
+            } else {
+                fprintf(stderr, "Unrecognized command: %s\n", arg);
+                return usage(arg0);
+            }
+            return cmd(arg0, argc - i - 1, &argv[i + 1]);
+        }
     }
 
-    zig_unreachable();
+    return usage(arg0);
 }
src/parseh.cpp
@@ -0,0 +1,351 @@
+#include "parseh.hpp"
+
+#include <clang-c/Index.h>
+
+#include <string.h>
+
+struct Arg {
+    Buf name;
+    Buf *type;
+};
+
+struct Fn {
+    Buf name;
+    Buf *return_type;
+    Arg *args;
+    int arg_count;
+};
+
+struct ParseH {
+    FILE *f;
+    ZigList<Fn *> fn_list;
+    Fn *cur_fn;
+    int arg_index;
+    int cur_indent;
+};
+
+static const int indent_size = 4;
+
+static bool str_has_prefix(const char *str, const char *prefix) {
+    while (*prefix) {
+        if (*str && *str == *prefix) {
+            str += 1;
+            prefix += 1;
+        } else {
+            return false;
+        }
+    }
+    return true;
+}
+
+static const char *prefixes_stripped(CXType type) {
+    CXString name = clang_getTypeSpelling(type);
+    const char *c_name = clang_getCString(name);
+
+    static const char *prefixes[] = {
+        "struct ",
+        "enum ",
+        "const ",
+    };
+
+start_over:
+
+    for (int i = 0; i < array_length(prefixes); i += 1) {
+        const char *prefix = prefixes[i];
+        if (str_has_prefix(c_name, prefix)) {
+            c_name += strlen(prefix);
+            goto start_over;
+        }
+    }
+    return c_name;
+}
+
+static Buf *to_zig_type(CXType raw_type) {
+    CXType canonical = clang_getCanonicalType(raw_type);
+    switch (canonical.kind) {
+        case CXType_Invalid:
+            zig_unreachable();
+        case CXType_Unexposed:
+            zig_panic("clang C api insufficient");
+        case CXType_Void:
+            return buf_create_from_str("void");
+        case CXType_Bool:
+            return buf_create_from_str("bool");
+        case CXType_SChar:
+            return buf_create_from_str("i8");
+        case CXType_Char_U:
+        case CXType_Char_S:
+        case CXType_UChar:
+            return buf_create_from_str("u8");
+        case CXType_WChar:
+            zig_panic("TODO");
+        case CXType_Char16:
+            zig_panic("TODO");
+        case CXType_Char32:
+            zig_panic("TODO");
+        case CXType_UShort:
+            return buf_create_from_str("c_ushort");
+        case CXType_UInt:
+            return buf_create_from_str("c_uint");
+        case CXType_ULong:
+            return buf_create_from_str("c_ulong");
+        case CXType_ULongLong:
+            return buf_create_from_str("c_ulonglong");
+        case CXType_UInt128:
+            zig_panic("TODO");
+        case CXType_Short:
+            return buf_create_from_str("c_short");
+        case CXType_Int:
+            return buf_create_from_str("c_int");
+        case CXType_Long:
+            return buf_create_from_str("c_long");
+        case CXType_LongLong:
+            return buf_create_from_str("c_longlong");
+        case CXType_Int128:
+            zig_panic("TODO");
+        case CXType_Float:
+            return buf_create_from_str("f32");
+        case CXType_Double:
+            return buf_create_from_str("f64");
+        case CXType_LongDouble:
+            return buf_create_from_str("f128");
+        case CXType_NullPtr:
+            zig_panic("TODO");
+        case CXType_Overload:
+            zig_panic("TODO");
+        case CXType_Dependent:
+            zig_panic("TODO");
+        case CXType_ObjCId:
+            zig_panic("TODO");
+        case CXType_ObjCClass:
+            zig_panic("TODO");
+        case CXType_ObjCSel:
+            zig_panic("TODO");
+        case CXType_Complex:
+            zig_panic("TODO");
+        case CXType_Pointer:
+            {
+                CXType pointee_type = clang_getPointeeType(canonical);
+                Buf *pointee_buf = to_zig_type(pointee_type);
+                if (clang_isConstQualifiedType(pointee_type)) {
+                    return buf_sprintf("*const %s", buf_ptr(pointee_buf));
+                } else {
+                    return buf_sprintf("*mut %s", buf_ptr(pointee_buf));
+                }
+            }
+        case CXType_BlockPointer:
+            zig_panic("TODO");
+        case CXType_LValueReference:
+            zig_panic("TODO");
+        case CXType_RValueReference:
+            zig_panic("TODO");
+        case CXType_Record:
+            {
+                const char *name = prefixes_stripped(canonical);
+                return buf_sprintf("%s", name);
+            }
+        case CXType_Enum:
+            {
+                const char *name = prefixes_stripped(canonical);
+                return buf_sprintf("%s", name);
+            }
+        case CXType_Typedef:
+            zig_panic("TODO");
+        case CXType_ObjCInterface:
+            zig_panic("TODO");
+        case CXType_ObjCObjectPointer:
+            zig_panic("TODO");
+        case CXType_FunctionNoProto:
+            zig_panic("TODO");
+        case CXType_FunctionProto:
+            zig_panic("TODO");
+        case CXType_ConstantArray:
+            zig_panic("TODO");
+        case CXType_Vector:
+            zig_panic("TODO");
+        case CXType_IncompleteArray:
+            zig_panic("TODO");
+        case CXType_VariableArray:
+            zig_panic("TODO");
+        case CXType_DependentSizedArray:
+            zig_panic("TODO");
+        case CXType_MemberPointer:
+            zig_panic("TODO");
+    }
+
+    zig_unreachable();
+}
+
+static bool is_storage_class_export(CX_StorageClass storage_class) {
+    switch (storage_class) {
+        case CX_SC_Invalid:
+            zig_unreachable();
+        case CX_SC_None:
+        case CX_SC_Extern:
+        case CX_SC_Auto:
+            return true;
+        case CX_SC_Static:
+        case CX_SC_PrivateExtern:
+        case CX_SC_OpenCLWorkGroupLocal:
+        case CX_SC_Register:
+            return false;
+    }
+    zig_unreachable();
+}
+
+static void begin_fn(ParseH *p) {
+    assert(!p->cur_fn);
+    p->cur_fn = allocate<Fn>(1);
+}
+
+static void end_fn(ParseH *p) {
+    if (p->cur_fn) {
+        p->fn_list.append(p->cur_fn);
+        p->cur_fn = nullptr;
+    }
+}
+
+static enum CXChildVisitResult fn_visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) {
+    ParseH *p = (ParseH*)client_data;
+    enum CXCursorKind kind = clang_getCursorKind(cursor);
+    CXString name = clang_getCursorSpelling(cursor);
+
+    switch (kind) {
+    case CXCursor_FunctionDecl:
+        {
+            CX_StorageClass storage_class = clang_Cursor_getStorageClass(cursor);
+            if (!is_storage_class_export(storage_class))
+                return CXChildVisit_Continue;
+
+            end_fn(p);
+            begin_fn(p);
+
+            CXType fn_type = clang_getCursorType(cursor);
+            if (clang_isFunctionTypeVariadic(fn_type)) {
+                zig_panic("TODO support variadic function");
+            }
+            if (clang_getFunctionTypeCallingConv(fn_type) != CXCallingConv_C) {
+                zig_panic("TODO support non c calling convention");
+            }
+            CXType return_type = clang_getResultType(fn_type);
+            p->cur_fn->return_type = to_zig_type(return_type);
+
+            buf_init_from_str(&p->cur_fn->name, clang_getCString(name));
+
+            p->cur_fn->arg_count = clang_getNumArgTypes(fn_type);
+            p->cur_fn->args = allocate<Arg>(p->cur_fn->arg_count);
+
+            for (int i = 0; i < p->cur_fn->arg_count; i += 1) {
+                CXType param_type = clang_getArgType(fn_type, i);
+                p->cur_fn->args[i].type = to_zig_type(param_type);
+            }
+
+            p->arg_index = 0;
+
+            return CXChildVisit_Recurse;
+        }
+    case CXCursor_ParmDecl:
+        {
+            assert(p->cur_fn);
+            assert(p->arg_index < p->cur_fn->arg_count);
+            buf_init_from_str(&p->cur_fn->args[p->arg_index].name, clang_getCString(name));
+            p->arg_index += 1;
+            return CXChildVisit_Continue;
+        }
+    case CXCursor_UnexposedAttr:
+    case CXCursor_CompoundStmt:
+    case CXCursor_FieldDecl:
+        return CXChildVisit_Continue;
+    default:
+        return CXChildVisit_Recurse;
+    }
+}
+
+static void print_indent(ParseH *p) {
+    for (int i = 0; i < p->cur_indent; i += 1) {
+        fprintf(p->f, " ");
+    }
+}
+
+void parse_h_file(const char *target_path, ZigList<const char *> *clang_argv, FILE *f) {
+    ParseH parse_h = {0};
+    ParseH *p = &parse_h;
+    p->f = f;
+    CXTranslationUnit tu;
+    CXIndex index = clang_createIndex(1, 0);
+
+    char *ZIG_PARSEH_CFLAGS = getenv("ZIG_PARSEH_CFLAGS");
+    if (ZIG_PARSEH_CFLAGS) {
+        Buf tmp_buf = {0};
+        char *start = ZIG_PARSEH_CFLAGS;
+        char *space = strstr(start, " ");
+        while (space) {
+            if (space - start > 0) {
+                buf_init_from_mem(&tmp_buf, start, space - start);
+                clang_argv->append(buf_ptr(buf_create_from_buf(&tmp_buf)));
+            }
+            start = space + 1;
+            space = strstr(start, " ");
+        }
+        buf_init_from_str(&tmp_buf, start);
+        clang_argv->append(buf_ptr(buf_create_from_buf(&tmp_buf)));
+    }
+
+    clang_argv->append(nullptr);
+
+    enum CXErrorCode err_code;
+    if ((err_code = clang_parseTranslationUnit2(index, target_path,
+            clang_argv->items, clang_argv->length - 1,
+            NULL, 0, CXTranslationUnit_None, &tu)))
+    {
+        zig_panic("parse translation unit failure");
+    }
+
+
+    unsigned diag_count = clang_getNumDiagnostics(tu);
+
+    if (diag_count > 0) {
+        for (unsigned i = 0; i < diag_count; i += 1) {
+            CXDiagnostic diagnostic = clang_getDiagnostic(tu, i);
+            CXSourceLocation location = clang_getDiagnosticLocation(diagnostic);
+
+            CXFile file;
+            unsigned line, column, offset;
+            clang_getSpellingLocation(location, &file, &line, &column, &offset);
+            CXString text = clang_getDiagnosticSpelling(diagnostic);
+            CXString file_name = clang_getFileName(file);
+            fprintf(stderr, "%s line %u, column %u: %s\n", clang_getCString(file_name),
+                    line, column, clang_getCString(text));
+        }
+
+        exit(1);
+    }
+
+
+    CXCursor cursor = clang_getTranslationUnitCursor(tu);
+    clang_visitChildren(cursor, fn_visitor, p);
+    end_fn(p);
+
+    if (p->fn_list.length) {
+        fprintf(f, "extern {\n");
+        p->cur_indent += indent_size;
+        for (int fn_i = 0; fn_i < p->fn_list.length; fn_i += 1) {
+            Fn *fn = p->fn_list.at(fn_i);
+            print_indent(p);
+            fprintf(p->f, "fn %s(", buf_ptr(&fn->name));
+            for (int arg_i = 0; arg_i < fn->arg_count; arg_i += 1) {
+                Arg *arg = &fn->args[arg_i];
+                fprintf(p->f, "%s: %s", buf_ptr(&arg->name), buf_ptr(arg->type));
+                if (arg_i + 1 < fn->arg_count) {
+                    fprintf(p->f, ", ");
+                }
+            }
+            fprintf(p->f, ")");
+            if (!buf_eql_str(fn->return_type, "void")) {
+                fprintf(p->f, " -> %s", buf_ptr(fn->return_type));
+            }
+            fprintf(p->f, ";\n");
+        }
+        fprintf(f, "}\n");
+    }
+}
src/parseh.hpp
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2015 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
+
+#ifndef ZIG_PARSEH_HPP
+#define ZIG_PARSEH_HPP
+
+#include "buffer.hpp"
+
+#include <stdio.h>
+
+void parse_h_file(const char *target_path, ZigList<const char *> *clang_argv, FILE *f);
+
+#endif
CMakeLists.txt
@@ -16,6 +16,9 @@ find_package(llvm)
 include_directories(${LLVM_INCLUDE_DIRS})
 link_directories(${LLVM_LIBDIRS})
 
+find_package(clang)
+include_directories(${CLANG_INCLUDE_DIR})
+
 include_directories(
     ${CMAKE_SOURCE_DIR}
     ${CMAKE_BINARY_DIR}
@@ -33,6 +36,7 @@ set(ZIG_SOURCES
     "${CMAKE_SOURCE_DIR}/src/util.cpp"
     "${CMAKE_SOURCE_DIR}/src/errmsg.cpp"
     "${CMAKE_SOURCE_DIR}/src/zig_llvm.cpp"
+    "${CMAKE_SOURCE_DIR}/src/parseh.cpp"
 )
 
 set(TEST_SOURCES
@@ -64,6 +68,7 @@ set_target_properties(zig PROPERTIES
     COMPILE_FLAGS ${EXE_CFLAGS})
 target_link_libraries(zig LINK_PUBLIC
     ${LLVM_LIBRARIES}
+    ${CLANG_LIBRARY}
 )
 install(TARGETS zig DESTINATION bin)