Commit 174baa49bd
Changed files (9)
src/main.cpp
@@ -15,7 +15,6 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
-#include <stdarg.h>
#include <limits.h>
#include <stdint.h>
#include <errno.h>
@@ -50,82 +49,6 @@ static Buf *fetch_file(FILE *f) {
return buf;
}
-void ast_error(Token *token, const char *format, ...) {
- int line = token->start_line + 1;
- int column = token->start_column + 1;
-
- va_list ap;
- va_start(ap, format);
- fprintf(stderr, "Error: Line %d, column %d: ", line, column);
- vfprintf(stderr, format, ap);
- fprintf(stderr, "\n");
- va_end(ap);
- exit(EXIT_FAILURE);
-}
-
-static const char *node_type_str(NodeType node_type) {
- switch (node_type) {
- case NodeTypeRoot:
- return "Root";
- case NodeTypeFnDecl:
- return "FnDecl";
- case NodeTypeParamDecl:
- return "ParamDecl";
- case NodeTypeType:
- return "Type";
- case NodeTypePointerType:
- return "PointerType";
- case NodeTypeBlock:
- return "Block";
- case NodeTypeStatement:
- return "Statement";
- case NodeTypeExpressionStatement:
- return "ExpressionStatement";
- case NodeTypeReturnStatement:
- return "ReturnStatement";
- case NodeTypeExpression:
- return "Expression";
- case NodeTypeFnCall:
- return "FnCall";
- }
- zig_panic("unreachable");
-}
-
-static void ast_print(AstNode *node, int indent) {
- for (int i = 0; i < indent; i += 1) {
- fprintf(stderr, " ");
- }
-
- switch (node->type) {
- case NodeTypeRoot:
- fprintf(stderr, "%s\n", node_type_str(node->type));
- for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
- AstNode *child = node->data.root.fn_decls.at(i);
- ast_print(child, indent + 2);
- }
- break;
- case NodeTypeFnDecl:
- {
- Buf *name_buf = &node->data.fn_decl.name;
- fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
-
- for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
- AstNode *child = node->data.fn_decl.params.at(i);
- ast_print(child, indent + 2);
- }
-
- ast_print(node->data.fn_decl.return_type, indent + 2);
-
- ast_print(node->data.fn_decl.body, indent + 2);
-
- break;
- }
- default:
- fprintf(stderr, "%s\n", node_type_str(node->type));
- break;
- }
-}
-
char cur_dir[1024];
int main(int argc, char **argv) {
src/parser.cpp
@@ -0,0 +1,80 @@
+#include "parser.hpp"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+void ast_error(Token *token, const char *format, ...) {
+ int line = token->start_line + 1;
+ int column = token->start_column + 1;
+
+ va_list ap;
+ va_start(ap, format);
+ fprintf(stderr, "Error: Line %d, column %d: ", line, column);
+ vfprintf(stderr, format, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+const char *node_type_str(NodeType node_type) {
+ switch (node_type) {
+ case NodeTypeRoot:
+ return "Root";
+ case NodeTypeFnDecl:
+ return "FnDecl";
+ case NodeTypeParamDecl:
+ return "ParamDecl";
+ case NodeTypeType:
+ return "Type";
+ case NodeTypePointerType:
+ return "PointerType";
+ case NodeTypeBlock:
+ return "Block";
+ case NodeTypeStatement:
+ return "Statement";
+ case NodeTypeExpressionStatement:
+ return "ExpressionStatement";
+ case NodeTypeReturnStatement:
+ return "ReturnStatement";
+ case NodeTypeExpression:
+ return "Expression";
+ case NodeTypeFnCall:
+ return "FnCall";
+ }
+ zig_panic("unreachable");
+}
+
+void ast_print(AstNode *node, int indent) {
+ for (int i = 0; i < indent; i += 1) {
+ fprintf(stderr, " ");
+ }
+
+ switch (node->type) {
+ case NodeTypeRoot:
+ fprintf(stderr, "%s\n", node_type_str(node->type));
+ for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
+ AstNode *child = node->data.root.fn_decls.at(i);
+ ast_print(child, indent + 2);
+ }
+ break;
+ case NodeTypeFnDecl:
+ {
+ Buf *name_buf = &node->data.fn_decl.name;
+ fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
+
+ for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
+ AstNode *child = node->data.fn_decl.params.at(i);
+ ast_print(child, indent + 2);
+ }
+
+ ast_print(node->data.fn_decl.return_type, indent + 2);
+
+ ast_print(node->data.fn_decl.body, indent + 2);
+
+ break;
+ }
+ default:
+ fprintf(stderr, "%s\n", node_type_str(node->type));
+ break;
+ }
+}
src/parser.hpp
@@ -82,6 +82,11 @@ struct AstNode {
__attribute__ ((format (printf, 2, 3)))
void ast_error(Token *token, const char *format, ...);
+// This function is provided by generated code, generated by parsergen.cpp
AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens);
+const char *node_type_str(NodeType node_type);
+
+void ast_print(AstNode *node, int indent);
+
#endif
src/parsergen.cpp
@@ -190,12 +190,17 @@ struct RuleNode {
enum ParserStateType {
ParserStateTypeError,
ParserStateTypeOk,
+ ParserStateTypeCapture,
};
struct ParserStateError {
Buf *msg;
};
+struct ParserStateCapture {
+ Buf *body;
+};
+
struct ParserState {
ParserStateType type;
// One for each token ID.
@@ -203,6 +208,7 @@ struct ParserState {
int index;
union {
ParserStateError error;
+ ParserStateCapture capture;
};
};
@@ -278,6 +284,8 @@ static void gen(Gen *g, RuleNode *node) {
RuleNode *child = node->tuple.children.at(i);
gen(g, child);
}
+ g->cur_state->type = ParserStateTypeCapture;
+ g->cur_state->capture.body = &node->tuple.body;
}
break;
case RuleNodeTypeMany:
@@ -598,7 +606,8 @@ int main(int argc, char **argv) {
g.cur_state = create_state(&g, ParserStateTypeOk);
gen(&g, g.root);
- fprintf(out_f, "/* This file is auto-generated by parsergen.cpp */\n");
+ fprintf(out_f, "/* This file is generated by parsergen.cpp */\n");
+ fprintf(out_f, "\n");
fprintf(out_f, "#include \"src/parser.hpp\"\n");
fprintf(out_f, "#include <stdio.h>\n");
@@ -616,6 +625,17 @@ int main(int argc, char **argv) {
fprintf(out_f, "static_assert(TokenId%s == %d, \"wrong token id\");\n",
buf_ptr(&token->name), token->id);
}
+ fprintf(out_f, "\n");
+
+ /* TODO
+ fprintf(out_f, "struct ParserGenNode{\n");
+ fprintf(out_f, " union {\n");
+ fprintf(out_f, " [%d];\n", biggest_tuple_len);
+ fprintf(out_f, " Token *token;\n");
+ fprintf(out_f, " };\n");
+ fprintf(out_f, "};\n");
+ fprintf(out_f, "\n");
+ */
fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens) {\n");
@@ -644,7 +664,6 @@ int main(int argc, char **argv) {
for (int i = 0; i < g.transition_table.length; i += 1) {
ParserState *state = g.transition_table.at(i);
fprintf(out_f, " case %d:\n", i);
- fprintf(out_f, " fprintf(stderr, \"state = %%d\\n\", state);\n");
switch (state->type) {
case ParserStateTypeError:
fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg));
@@ -655,6 +674,10 @@ int main(int argc, char **argv) {
state->index, g.transition_table.length);
fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
break;
+ case ParserStateTypeCapture:
+ // TODO fprintf(out_f, " %s\n", buf_ptr(state->capture.body));
+ fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
+ break;
}
fprintf(out_f, " break;\n");
}
src/tokenizer.cpp
@@ -1,3 +1,10 @@
+/*
+ * Copyright (c) 2015 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
#include "tokenizer.hpp"
#include "util.hpp"
src/tokenizer.hpp
@@ -1,3 +1,10 @@
+/*
+ * Copyright (c) 2015 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
#ifndef ZIG_TOKENIZER_HPP
#define ZIG_TOKENIZER_HPP
test/hello.zig
@@ -1,6 +1,4 @@
-
-
-fn main(argc: isize, argv: *mut u8) -> isize {
+fn main(argc: i32, argv: *mut u8) -> i32 {
puts("Hello, world!\n");
return 0;
}
CMakeLists.txt
@@ -21,14 +21,15 @@ include_directories(
)
set(GRAMMAR_TXT "${CMAKE_BINARY_DIR}/simple.txt")
-set(PARSER_CPP "${CMAKE_BINARY_DIR}/parser.cpp")
+set(PARSER_GENERATED_CPP "${CMAKE_BINARY_DIR}/parser_generated.cpp")
set(ZIG_SOURCES
"${CMAKE_SOURCE_DIR}/src/main.cpp"
"${CMAKE_SOURCE_DIR}/src/util.cpp"
"${CMAKE_SOURCE_DIR}/src/buffer.cpp"
"${CMAKE_SOURCE_DIR}/src/tokenizer.cpp"
- ${PARSER_CPP}
+ "${CMAKE_SOURCE_DIR}/src/parser.cpp"
+ ${PARSER_GENERATED_CPP}
)
set(PARSERGEN_SOURCES
@@ -68,8 +69,8 @@ set_target_properties(parsergen PROPERTIES
add_custom_command(
- OUTPUT ${PARSER_CPP}
- COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_CPP}
+ OUTPUT ${PARSER_GENERATED_CPP}
+ COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_GENERATED_CPP}
DEPENDS ${GRAMMAR_TXT} ${PARSERGEN_SOURCES}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
README.md
@@ -19,7 +19,7 @@ readable, safe, optimal, and concise code to solve any computing problem.
* Eliminate the need for C headers (when using zig internally).
* Ability to declare dependencies as Git URLS with commit locking (can
provide a tag or sha1).
- * Rust-style enums.
+ * Tagged union enum type.
* Opinionated when it makes life easier.
- Tab character in source code is a compile error.
- Whitespace at the end of line is a compile error.
@@ -32,23 +32,29 @@ readable, safe, optimal, and concise code to solve any computing problem.
* Hello, world.
- Build AST
- Code Gen
+ - Produce .o file.
+ * Produce executable file instead of .o file.
+ * Add debugging symbols.
+ * Debug/Release mode.
* C style comments.
* Unit tests.
* Simple .so library
* How should the Widget use case be solved? In Genesis I'm using C++ and inheritance.
-## Grammar
+### Primitive Numeric Types:
-```
-Root : FnDecl*
-FnDecl : TokenFn TokenSymbol TokenLParen list(ParamDecl, TokenComma, 0) TokenRParen (TokenArrow Type)? Block
-ParamDecl : TokenSymbol TokenColon Type
-Type : TokenSymbol | PointerType
-PointerType : TokenStar (TokenConst | TokenMut) Type
-Block : TokenLBrace Statement* Expression? TokenRBrace
-Statement : ExpressionStatement | ReturnStatement
-ExpressionStatement : Expression TokenSemicolon
-ReturnStatement : TokenReturn Expression TokenSemicolon
-Expression : TokenNumber | TokenString | FnCall
-FnCall : TokenSymbol TokenLParen list(Expression, TokenComma, 0) TokenRParen
-```
+zig | C equivalent | Description
+-------|--------------|-------------------------------
+ i8 | int8_t | signed 8-bit integer
+ u8 | uint8_t | unsigned 8-bit integer
+ i16 | int16_t | signed 16-bit integer
+ u16 | uint16_t | unsigned 16-bit integer
+ i32 | int32_t | signed 32-bit integer
+ u32 | uint32_t | unsigned 32-bit integer
+ i64 | int64_t | signed 64-bit integer
+ u64 | uint64_t | unsigned 64-bit integer
+ f32 | float | 32-bit IEE754 floating point
+ f64 | double | 64-bit IEE754 floating point
+ f128 | long double | 128-bit IEE754 floating point
+ isize | ssize_t | signed pointer sized integer
+ usize | size_t | unsigned pointer sized integer