Commit d431b0fb99
src-self-hosted/main.zig
@@ -88,6 +88,8 @@ const Token = struct {
Period,
Minus,
Arrow,
+ Colon,
+ Slash,
Keyword_align,
Keyword_and,
Keyword_asm,
@@ -135,6 +137,37 @@ const Tokenizer = struct {
buffer: []const u8,
index: usize,
+ pub const Location = struct {
+ line: usize,
+ column: usize,
+ line_start: usize,
+ line_end: usize,
+ };
+
+ pub fn getTokenLocation(self: &Tokenizer, token: &const Token) -> Location {
+ var loc = Location {
+ .line = 0,
+ .column = 0,
+ .line_start = 0,
+ .line_end = 0,
+ };
+ for (self.buffer) |c, i| {
+ if (i == token.start) {
+ loc.line_end = i;
+ while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
+ return loc;
+ }
+ if (c == '\n') {
+ loc.line += 1;
+ loc.column = 0;
+ loc.line_start = i;
+ } else {
+ loc.column += 1;
+ }
+ }
+ return loc;
+ }
+
pub fn dump(self: &Tokenizer, token: &const Token) {
warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
}
@@ -154,6 +187,8 @@ const Tokenizer = struct {
StringLiteral,
StringLiteralBackslash,
Minus,
+ Slash,
+ LineComment,
};
pub fn next(self: &Tokenizer) -> Token {
@@ -206,6 +241,11 @@ const Tokenizer = struct {
self.index += 1;
break;
},
+ ':' => {
+ result.id = Token.Id.Colon;
+ self.index += 1;
+ break;
+ },
'%' => {
result.id = Token.Id.Percent;
self.index += 1;
@@ -229,6 +269,9 @@ const Tokenizer = struct {
'-' => {
state = State.Minus;
},
+ '/' => {
+ state = State.Slash;
+ },
else => {
result.id = Token.Id.Invalid;
self.index += 1;
@@ -289,78 +332,363 @@ const Tokenizer = struct {
break;
},
},
+ State.Slash => switch (c) {
+ '/' => {
+ result.id = undefined;
+ state = State.LineComment;
+ },
+ else => {
+ result.id = Token.Id.Slash;
+ break;
+ },
+ },
+ State.LineComment => switch (c) {
+ '\n' => {
+ state = State.Start;
+ result = Token {
+ .id = Token.Id.Eof,
+ .start = self.index + 1,
+ .end = undefined,
+ };
+ },
+ else => {},
+ },
}
}
result.end = self.index;
+ // TODO check state when returning EOF
return result;
}
};
+const Visibility = enum {
+ Private,
+ Pub,
+ Export,
+};
+
+const Mutability = enum {
+ Const,
+ Var,
+};
+
const AstNode = struct {
+ id: Id,
+
+ const Id = enum {
+ Root,
+ VarDecl,
+ Identifier,
+ };
+ fn iterate(base: &AstNode, index: usize) -> ?&AstNode {
+ return switch (base.id) {
+ Id.Root => @fieldParentPtr(AstNodeRoot, "base", base).iterate(index),
+ Id.VarDecl => @fieldParentPtr(AstNodeVarDecl, "base", base).iterate(index),
+ Id.Identifier => @fieldParentPtr(AstNodeIdentifier, "base", base).iterate(index),
+ };
+ }
+};
+
+const AstNodeRoot = struct {
+ base: AstNode,
+ decls: ArrayList(&AstNode),
+
+ fn iterate(self: &AstNodeRoot, index: usize) -> ?&AstNode {
+ if (index < self.decls.len) {
+ return self.decls.items[index];
+ }
+ return null;
+ }
};
+const AstNodeVarDecl = struct {
+ base: AstNode,
+ visib: Visibility,
+ name_token: Token,
+ eq_token: Token,
+ mut: Mutability,
+ is_comptime: bool,
+ type_node: ?&AstNode,
+ align_node: ?&AstNode,
+ init_node: ?&AstNode,
+
+ fn iterate(self: &AstNodeVarDecl, index: usize) -> ?&AstNode {
+ var i = index;
+
+ if (self.type_node) |type_node| {
+ if (i < 1) return type_node;
+ i -= 1;
+ }
+
+ if (self.align_node) |align_node| {
+ if (i < 1) return align_node;
+ i -= 1;
+ }
+
+ if (self.init_node) |init_node| {
+ if (i < 1) return init_node;
+ i -= 1;
+ }
+
+ return null;
+ }
+};
+
+const AstNodeIdentifier = struct {
+ base: AstNode,
+ name_token: Token,
+
+ fn iterate(self: &AstNodeIdentifier, index: usize) -> ?&AstNode {
+ return null;
+ }
+};
+
+error ParseError;
+
const Parser = struct {
tokenizer: &Tokenizer,
allocator: &mem.Allocator,
+ put_back_tokens: [1]Token,
+ put_back_count: usize,
+ source_file_name: []const u8,
- fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator) -> Parser {
+ fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator, source_file_name: []const u8) -> Parser {
return Parser {
.tokenizer = tokenizer,
.allocator = allocator,
+ .put_back_tokens = undefined,
+ .put_back_count = 0,
+ .source_file_name = source_file_name,
};
}
- const StackFrame = struct {
-
- };
-
- const State = enum {
+ const State = union(enum) {
TopLevel,
- Expression,
+ TopLevelModifier: Visibility,
+ Expression: &?&AstNode,
+ GroupedExpression: &?&AstNode,
+ PrimaryExpression: &?&AstNode,
+ TypeExpr: &?&AstNode,
+ VarDecl: &AstNodeVarDecl,
+ VarDeclAlign: &AstNodeVarDecl,
+ VarDeclEq: &AstNodeVarDecl,
+ ExpectSemicolon,
};
- fn parse(self: &Parser) -> %void {
- var stack = ArrayList(StackFrame).init(self.allocator);
+ pub fn parse(self: &Parser) -> %&AstNode {
+ var stack = ArrayList(State).init(self.allocator);
defer stack.deinit();
- var state = State.TopLevel;
+ %return stack.append(State.TopLevel);
+
+ const root_node = %return self.createRoot();
+ // TODO %defer self.freeAst();
+
while (true) {
- const token = self.tokenizer.next();
+ // This gives us 1 free append that can't fail
+ const state = stack.pop();
+
switch (state) {
- State.TopLevel => switch (token.id) {
- Token.Id.Keyword_pub => {
- const next_token = self.tokenizer.next();
- switch (next_token.id) {
- Token.Id.Keyword_fn => {
- const fn_name = self.tokenizer.next();
- if (fn_name.id != Token.Id.Identifier) {
- @panic("parse error");
- }
-
- const lparen = self.tokenizer.next();
- if (lparen.id != Token.Id.LParen) {
- @panic("parse error");
- }
- },
- Token.Id.Keyword_const => @panic("TODO"),
- Token.Id.Keyword_var => @panic("TODO"),
- Token.Id.Keyword_use => @panic("TODO"),
- else => @panic("parse error"),
- }
- },
- Token.Id.Keyword_const => @panic("TODO"),
- Token.Id.Keyword_var => @panic("TODO"),
- Token.Id.Keyword_fn => @panic("TODO"),
- Token.Id.Keyword_export => @panic("TODO"),
- Token.Id.Keyword_use => @panic("TODO"),
- Token.Id.Keyword_comptime => @panic("TODO"),
- else => @panic("parse error"),
+ State.TopLevel => {
+ const token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.Keyword_pub => {
+ stack.append(State {.TopLevelModifier = Visibility.Pub }) %% unreachable;
+ continue;
+ },
+ Token.Id.Keyword_export => {
+ stack.append(State {.TopLevelModifier = Visibility.Export }) %% unreachable;
+ continue;
+ },
+ Token.Id.Keyword_const => {
+ stack.append(State.TopLevel) %% unreachable;
+ const var_decl_node = %return self.createVarDecl(Visibility.Private, Mutability.Const, false);
+ %return root_node.decls.append(&var_decl_node.base);
+ %return stack.append(State { .VarDecl = var_decl_node });
+ continue;
+ },
+ Token.Id.Keyword_var => {
+ stack.append(State.TopLevel) %% unreachable;
+ const var_decl_node = %return self.createVarDecl(Visibility.Private, Mutability.Var, false);
+ %return root_node.decls.append(&var_decl_node.base);
+ %return stack.append(State { .VarDecl = var_decl_node });
+ continue;
+ },
+ Token.Id.Eof => return &root_node.base,
+ else => return self.parseError(token, "expected top level declaration, found {}", @tagName(token.id)),
+ }
+ },
+ State.TopLevelModifier => |visib| {
+ const token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.Keyword_const => {
+ stack.append(State.TopLevel) %% unreachable;
+ const var_decl_node = %return self.createVarDecl(visib, Mutability.Const, false);
+ %return root_node.decls.append(&var_decl_node.base);
+ %return stack.append(State { .VarDecl = var_decl_node });
+ continue;
+ },
+ Token.Id.Keyword_var => {
+ stack.append(State.TopLevel) %% unreachable;
+ const var_decl_node = %return self.createVarDecl(visib, Mutability.Var, false);
+ %return root_node.decls.append(&var_decl_node.base);
+ %return stack.append(State { .VarDecl = var_decl_node });
+ continue;
+ },
+ else => return self.parseError(token, "expected top level declaration, found {}", @tagName(token.id)),
+ }
+ },
+ State.VarDecl => |var_decl| {
+ var_decl.name_token = %return self.eatToken(Token.Id.Identifier);
+ stack.append(State { .VarDeclAlign = var_decl }) %% unreachable;
+
+ const next_token = self.getNextToken();
+ if (next_token.id == Token.Id.Colon) {
+ %return stack.append(State { .TypeExpr = &var_decl.type_node });
+ continue;
+ }
+
+ self.putBackToken(next_token);
+ continue;
+ },
+ State.VarDeclAlign => |var_decl| {
+ stack.append(State { .VarDeclEq = var_decl }) %% unreachable;
+
+ const next_token = self.getNextToken();
+ if (next_token.id == Token.Id.Keyword_align) {
+ %return stack.append(State { .GroupedExpression = &var_decl.align_node });
+ continue;
+ }
+
+ self.putBackToken(next_token);
+ continue;
+ },
+ State.VarDeclEq => |var_decl| {
+ var_decl.eq_token = %return self.eatToken(Token.Id.Equal);
+ stack.append(State.ExpectSemicolon) %% unreachable;
+ %return stack.append(State {
+ .Expression = &var_decl.init_node,
+ });
+ continue;
+ },
+ State.ExpectSemicolon => {
+ _ = %return self.eatToken(Token.Id.Semicolon);
+ continue;
},
- State.Expression => @panic("TODO"),
+ State.Expression => |result_ptr| {
+ // TODO this should not jump straight to primary expression
+ stack.append(State {.PrimaryExpression = result_ptr}) %% unreachable;
+ continue;
+ },
+ State.PrimaryExpression => |result_ptr| {
+ const token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.Identifier => {
+ const identifier = %return self.createIdentifier(token);
+ *result_ptr = &identifier.base;
+ continue;
+ },
+ else => return self.parseError(token, "expected primary expression, found {}", @tagName(token.id)),
+ }
+ },
+ State.TypeExpr => @panic("TODO"),
+ State.GroupedExpression => @panic("TODO"),
+ }
+ unreachable;
+ }
+ }
+
+ fn createRoot(self: &Parser) -> %&AstNodeRoot {
+ const node = %return self.allocator.create(AstNodeRoot);
+ %defer self.allocator.destroy(node);
+
+ *node = AstNodeRoot {
+ .base = AstNode {.id = AstNode.Id.Root},
+ .decls = ArrayList(&AstNode).init(self.allocator),
+ };
+ return node;
+ }
+
+ fn createVarDecl(self: &Parser, visib: Visibility, mut: Mutability, is_comptime: bool) -> %&AstNodeVarDecl {
+ const node = %return self.allocator.create(AstNodeVarDecl);
+ %defer self.allocator.destroy(node);
+
+ *node = AstNodeVarDecl {
+ .base = AstNode {.id = AstNode.Id.VarDecl},
+ .visib = visib,
+ .mut = mut,
+ .is_comptime = is_comptime,
+ .type_node = null,
+ .align_node = null,
+ .init_node = null,
+ // initialized later
+ .name_token = undefined,
+ .eq_token = undefined,
+ };
+ return node;
+ }
+
+ fn createIdentifier(self: &Parser, name_token: &const Token) -> %&AstNodeIdentifier {
+ const node = %return self.allocator.create(AstNodeIdentifier);
+ %defer self.allocator.destroy(node);
+
+ *node = AstNodeIdentifier {
+ .base = AstNode {.id = AstNode.Id.Identifier},
+ .name_token = *name_token,
+ };
+ return node;
+ }
+
+ fn parseError(self: &Parser, token: &const Token, comptime fmt: []const u8, args: ...) -> error {
+ const loc = self.tokenizer.getTokenLocation(token);
+ warn("{}:{}:{}: error: " ++ fmt ++ "\n", self.source_file_name, loc.line + 1, loc.column + 1, args);
+ warn("{}\n", self.tokenizer.buffer[loc.line_start..loc.line_end]);
+ {
+ var i: usize = 0;
+ while (i < loc.column) : (i += 1) {
+ warn(" ");
}
}
+ {
+ const caret_count = token.end - token.start;
+ var i: usize = 0;
+ while (i < caret_count) : (i += 1) {
+ warn("~");
+ }
+ }
+ warn("\n");
+ return error.ParseError;
+ }
+
+ fn expectToken(self: &Parser, token: &const Token, id: @TagType(Token.Id)) -> %void {
+ if (token.id != id) {
+ return self.parseError(token, "expected {}, found {}", @tagName(id), @tagName(token.id));
+ }
+ }
+
+ fn eatToken(self: &Parser, id: @TagType(Token.Id)) -> %Token {
+ const token = self.getNextToken();
+ %return self.expectToken(token, id);
+ return token;
}
+
+ fn putBackToken(self: &Parser, token: &const Token) {
+ self.put_back_tokens[self.put_back_count] = *token;
+ self.put_back_count += 1;
+ }
+
+ fn getNextToken(self: &Parser) -> Token {
+ return if (self.put_back_count != 0) {
+ const put_back_index = self.put_back_count - 1;
+ const put_back_token = self.put_back_tokens[put_back_index];
+ self.put_back_count = put_back_index;
+ put_back_token
+ } else {
+ self.tokenizer.next()
+ };
+ }
+
};
@@ -384,8 +712,11 @@ pub fn main2() -> %void {
const target_file_buf = %return io.readFileAlloc(target_file, allocator);
+ warn("====input:====\n");
+
warn("{}", target_file_buf);
+ warn("====tokenization:====\n");
{
var tokenizer = Tokenizer.init(target_file_buf);
while (true) {
@@ -397,7 +728,26 @@ pub fn main2() -> %void {
}
}
+ warn("====parse:====\n");
+
var tokenizer = Tokenizer.init(target_file_buf);
- var parser = Parser.init(&tokenizer, allocator);
- %return parser.parse();
+ var parser = Parser.init(&tokenizer, allocator, target_file);
+ const node = %return parser.parse();
+
+
+ render(node, 0);
+}
+
+fn render(node: &AstNode, indent: usize) {
+ {
+ var i: usize = 0;
+ while (i < indent) : (i += 1) {
+ warn(" ");
+ }
+ }
+ warn("{}\n", @tagName(node.id));
+ var i: usize = 0;
+ while (node.iterate(i)) |child| : (i += 1) {
+ render(child, indent + 2);
+ }
}
std/mem.zig
@@ -8,6 +8,7 @@ pub const Cmp = math.Cmp;
pub const Allocator = struct {
/// Allocate byte_count bytes and return them in a slice, with the
/// slice's pointer aligned at least to alignment bytes.
+ /// The returned newly allocated memory is undefined.
allocFn: fn (self: &Allocator, byte_count: usize, alignment: u29) -> %[]u8,
/// If `new_byte_count > old_mem.len`:
@@ -17,6 +18,8 @@ pub const Allocator = struct {
/// If `new_byte_count <= old_mem.len`:
/// * this function must return successfully.
/// * alignment <= alignment of old_mem.ptr
+ ///
+ /// The returned newly allocated memory is undefined.
reallocFn: fn (self: &Allocator, old_mem: []u8, new_byte_count: usize, alignment: u29) -> %[]u8,
/// Guaranteed: `old_mem.len` is the same as what was returned from `allocFn` or `reallocFn`
@@ -40,6 +43,10 @@ pub const Allocator = struct {
{
const byte_count = %return math.mul(usize, @sizeOf(T), n);
const byte_slice = %return self.allocFn(self, byte_count, alignment);
+ // This loop should get optimized out in ReleaseFast mode
+ for (byte_slice) |*byte| {
+ *byte = undefined;
+ }
return ([]align(alignment) T)(@alignCast(alignment, byte_slice));
}
@@ -56,6 +63,10 @@ pub const Allocator = struct {
const byte_count = %return math.mul(usize, @sizeOf(T), n);
const byte_slice = %return self.reallocFn(self, ([]u8)(old_mem), byte_count, alignment);
+ // This loop should get optimized out in ReleaseFast mode
+ for (byte_slice[old_mem.len..]) |*byte| {
+ *byte = undefined;
+ }
return ([]T)(@alignCast(alignment, byte_slice));
}