Commit d431b0fb99

Andrew Kelley <superjoe30@gmail.com>
2017-12-09 05:15:43
parse a simple variable declaration
1 parent 5ead324
Changed files (2)
src-self-hosted
std
src-self-hosted/main.zig
@@ -88,6 +88,8 @@ const Token = struct {
         Period,
         Minus,
         Arrow,
+        Colon,
+        Slash,
         Keyword_align,
         Keyword_and,
         Keyword_asm,
@@ -135,6 +137,37 @@ const Tokenizer = struct {
     buffer: []const u8,
     index: usize,
 
+    pub const Location = struct {
+        line: usize,
+        column: usize,
+        line_start: usize,
+        line_end: usize,
+    };
+
+    pub fn getTokenLocation(self: &Tokenizer, token: &const Token) -> Location {
+        var loc = Location {
+            .line = 0,
+            .column = 0,
+            .line_start = 0,
+            .line_end = 0,
+        };
+        for (self.buffer) |c, i| {
+            if (i == token.start) {
+                loc.line_end = i;
+                while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
+                return loc;
+            }
+            if (c == '\n') {
+                loc.line += 1;
+                loc.column = 0;
+                loc.line_start = i;
+            } else {
+                loc.column += 1;
+            }
+        }
+        return loc;
+    }
+
     pub fn dump(self: &Tokenizer, token: &const Token) {
         warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
     }
@@ -154,6 +187,8 @@ const Tokenizer = struct {
         StringLiteral,
         StringLiteralBackslash,
         Minus,
+        Slash,
+        LineComment,
     };
 
     pub fn next(self: &Tokenizer) -> Token {
@@ -206,6 +241,11 @@ const Tokenizer = struct {
                         self.index += 1;
                         break;
                     },
+                    ':' => {
+                        result.id = Token.Id.Colon;
+                        self.index += 1;
+                        break;
+                    },
                     '%' => {
                         result.id = Token.Id.Percent;
                         self.index += 1;
@@ -229,6 +269,9 @@ const Tokenizer = struct {
                     '-' => {
                         state = State.Minus;
                     },
+                    '/' => {
+                        state = State.Slash;
+                    },
                     else => {
                         result.id = Token.Id.Invalid;
                         self.index += 1;
@@ -289,78 +332,363 @@ const Tokenizer = struct {
                         break;
                     },
                 },
+                State.Slash => switch (c) {
+                    '/' => {
+                        result.id = undefined;
+                        state = State.LineComment;
+                    },
+                    else => {
+                        result.id = Token.Id.Slash;
+                        break;
+                    },
+                },
+                State.LineComment => switch (c) {
+                    '\n' => {
+                        state = State.Start;
+                        result = Token {
+                            .id = Token.Id.Eof,
+                            .start = self.index + 1,
+                            .end = undefined,
+                        };
+                    },
+                    else => {},
+                },
             }
         }
         result.end = self.index;
+        // TODO check state when returning EOF
         return result;
     }
 };
 
+const Visibility = enum {
+    Private,
+    Pub,
+    Export,
+};
+
+const Mutability = enum {
+    Const,
+    Var,
+};
+
 const AstNode = struct {
+    id: Id,
+
+    const Id = enum {
+        Root,
+        VarDecl,
+        Identifier,
+    };
 
+    fn iterate(base: &AstNode, index: usize) -> ?&AstNode {
+        return switch (base.id) {
+            Id.Root => @fieldParentPtr(AstNodeRoot, "base", base).iterate(index),
+            Id.VarDecl => @fieldParentPtr(AstNodeVarDecl, "base", base).iterate(index),
+            Id.Identifier => @fieldParentPtr(AstNodeIdentifier, "base", base).iterate(index),
+        };
+    }
+};
+
+const AstNodeRoot = struct {
+    base: AstNode,
+    decls: ArrayList(&AstNode),
+
+    fn iterate(self: &AstNodeRoot, index: usize) -> ?&AstNode {
+        if (index < self.decls.len) {
+            return self.decls.items[index];
+        }
+        return null;
+    }
 };
 
+const AstNodeVarDecl = struct {
+    base: AstNode,
+    visib: Visibility,
+    name_token: Token,
+    eq_token: Token,
+    mut: Mutability,
+    is_comptime: bool,
+    type_node: ?&AstNode,
+    align_node: ?&AstNode,
+    init_node: ?&AstNode,
+
+    fn iterate(self: &AstNodeVarDecl, index: usize) -> ?&AstNode {
+        var i = index;
+
+        if (self.type_node) |type_node| {
+            if (i < 1) return type_node;
+            i -= 1;
+        }
+
+        if (self.align_node) |align_node| {
+            if (i < 1) return align_node;
+            i -= 1;
+        }
+
+        if (self.init_node) |init_node| {
+            if (i < 1) return init_node;
+            i -= 1;
+        }
+
+        return null;
+    }
+};
+
+const AstNodeIdentifier = struct {
+    base: AstNode,
+    name_token: Token,
+
+    fn iterate(self: &AstNodeIdentifier, index: usize) -> ?&AstNode {
+        return null;
+    }
+};
+
+error ParseError;
+
 const Parser = struct {
     tokenizer: &Tokenizer,
     allocator: &mem.Allocator,
+    put_back_tokens: [1]Token,
+    put_back_count: usize,
+    source_file_name: []const u8,
 
-    fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator) -> Parser {
+    fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator, source_file_name: []const u8) -> Parser {
         return Parser {
             .tokenizer = tokenizer,
             .allocator = allocator,
+            .put_back_tokens = undefined,
+            .put_back_count = 0,
+            .source_file_name = source_file_name,
         };
     }
 
-    const StackFrame = struct {
-
-    };
-
-    const State = enum {
+    const State = union(enum) {
         TopLevel, 
-        Expression,
+        TopLevelModifier: Visibility, 
+        Expression: &?&AstNode,
+        GroupedExpression: &?&AstNode,
+        PrimaryExpression: &?&AstNode,
+        TypeExpr: &?&AstNode,
+        VarDecl: &AstNodeVarDecl,
+        VarDeclAlign: &AstNodeVarDecl,
+        VarDeclEq: &AstNodeVarDecl,
+        ExpectSemicolon,
     };
 
-    fn parse(self: &Parser) -> %void {
-        var stack = ArrayList(StackFrame).init(self.allocator);
+    pub fn parse(self: &Parser) -> %&AstNode {
+        var stack = ArrayList(State).init(self.allocator);
         defer stack.deinit();
 
-        var state = State.TopLevel;
+        %return stack.append(State.TopLevel);
+
+        const root_node = %return self.createRoot();
+        // TODO %defer self.freeAst();
+
         while (true) {
-            const token = self.tokenizer.next();
+            // This gives us 1 free append that can't fail
+            const state = stack.pop();
+
             switch (state) {
-                State.TopLevel => switch (token.id) {
-                    Token.Id.Keyword_pub => {
-                        const next_token = self.tokenizer.next();
-                        switch (next_token.id) {
-                            Token.Id.Keyword_fn => {
-                                const fn_name = self.tokenizer.next();
-                                if (fn_name.id != Token.Id.Identifier) {
-                                    @panic("parse error");
-                                }
-
-                                const lparen = self.tokenizer.next();
-                                if (lparen.id != Token.Id.LParen) {
-                                    @panic("parse error");
-                                }
-                            },
-                            Token.Id.Keyword_const => @panic("TODO"),
-                            Token.Id.Keyword_var => @panic("TODO"),
-                            Token.Id.Keyword_use => @panic("TODO"),
-                            else => @panic("parse error"),
-                        }
-                    },
-                    Token.Id.Keyword_const => @panic("TODO"),
-                    Token.Id.Keyword_var => @panic("TODO"),
-                    Token.Id.Keyword_fn => @panic("TODO"),
-                    Token.Id.Keyword_export => @panic("TODO"),
-                    Token.Id.Keyword_use => @panic("TODO"),
-                    Token.Id.Keyword_comptime => @panic("TODO"),
-                    else => @panic("parse error"),
+                State.TopLevel => {
+                    const token = self.getNextToken();
+                    switch (token.id) {
+                        Token.Id.Keyword_pub => {
+                            stack.append(State {.TopLevelModifier = Visibility.Pub }) %% unreachable;
+                            continue;
+                        },
+                        Token.Id.Keyword_export => {
+                            stack.append(State {.TopLevelModifier = Visibility.Export }) %% unreachable;
+                            continue;
+                        },
+                        Token.Id.Keyword_const => {
+                            stack.append(State.TopLevel) %% unreachable;
+                            const var_decl_node = %return self.createVarDecl(Visibility.Private, Mutability.Const, false);
+                            %return root_node.decls.append(&var_decl_node.base);
+                            %return stack.append(State { .VarDecl = var_decl_node });
+                            continue;
+                        },
+                        Token.Id.Keyword_var => {
+                            stack.append(State.TopLevel) %% unreachable;
+                            const var_decl_node = %return self.createVarDecl(Visibility.Private, Mutability.Var, false);
+                            %return root_node.decls.append(&var_decl_node.base);
+                            %return stack.append(State { .VarDecl = var_decl_node });
+                            continue;
+                        },
+                        Token.Id.Eof => return &root_node.base,
+                        else => return self.parseError(token, "expected top level declaration, found {}", @tagName(token.id)),
+                    }
+                },
+                State.TopLevelModifier => |visib| {
+                    const token = self.getNextToken();
+                    switch (token.id) {
+                        Token.Id.Keyword_const => {
+                            stack.append(State.TopLevel) %% unreachable;
+                            const var_decl_node = %return self.createVarDecl(visib, Mutability.Const, false);
+                            %return root_node.decls.append(&var_decl_node.base);
+                            %return stack.append(State { .VarDecl = var_decl_node });
+                            continue;
+                        },
+                        Token.Id.Keyword_var => {
+                            stack.append(State.TopLevel) %% unreachable;
+                            const var_decl_node = %return self.createVarDecl(visib, Mutability.Var, false);
+                            %return root_node.decls.append(&var_decl_node.base);
+                            %return stack.append(State { .VarDecl = var_decl_node });
+                            continue;
+                        },
+                        else => return self.parseError(token, "expected top level declaration, found {}", @tagName(token.id)),
+                    }
+                },
+                State.VarDecl => |var_decl| {
+                    var_decl.name_token = %return self.eatToken(Token.Id.Identifier);
+                    stack.append(State { .VarDeclAlign = var_decl }) %% unreachable;
+
+                    const next_token = self.getNextToken();
+                    if (next_token.id == Token.Id.Colon) {
+                        %return stack.append(State { .TypeExpr = &var_decl.type_node });
+                        continue;
+                    }
+
+                    self.putBackToken(next_token);
+                    continue;
+                },
+                State.VarDeclAlign => |var_decl| {
+                    stack.append(State { .VarDeclEq = var_decl }) %% unreachable;
+
+                    const next_token = self.getNextToken();
+                    if (next_token.id == Token.Id.Keyword_align) {
+                        %return stack.append(State { .GroupedExpression = &var_decl.align_node });
+                        continue;
+                    }
+
+                    self.putBackToken(next_token);
+                    continue;
+                },
+                State.VarDeclEq => |var_decl| {
+                    var_decl.eq_token = %return self.eatToken(Token.Id.Equal);
+                    stack.append(State.ExpectSemicolon) %% unreachable;
+                    %return stack.append(State {
+                        .Expression = &var_decl.init_node,
+                    });
+                    continue;
+                },
+                State.ExpectSemicolon => {
+                    _ = %return self.eatToken(Token.Id.Semicolon);
+                    continue;
                 },
-                State.Expression => @panic("TODO"),
+                State.Expression => |result_ptr| {
+                    // TODO this should not jump straight to primary expression
+                    stack.append(State {.PrimaryExpression = result_ptr}) %% unreachable;
+                    continue;
+                },
+                State.PrimaryExpression => |result_ptr| {
+                    const token = self.getNextToken();
+                    switch (token.id) {
+                        Token.Id.Identifier => {
+                            const identifier = %return self.createIdentifier(token);
+                            *result_ptr = &identifier.base;
+                            continue;
+                        },
+                        else => return self.parseError(token, "expected primary expression, found {}", @tagName(token.id)),
+                    }
+                },
+                State.TypeExpr => @panic("TODO"),
+                State.GroupedExpression => @panic("TODO"),
+            }
+            unreachable;
+        }
+    }
+
+    fn createRoot(self: &Parser) -> %&AstNodeRoot {
+        const node = %return self.allocator.create(AstNodeRoot);
+        %defer self.allocator.destroy(node);
+
+        *node = AstNodeRoot {
+            .base = AstNode {.id = AstNode.Id.Root},
+            .decls = ArrayList(&AstNode).init(self.allocator),
+        };
+        return node;
+    }
+
+    fn createVarDecl(self: &Parser, visib: Visibility, mut: Mutability, is_comptime: bool) -> %&AstNodeVarDecl {
+        const node = %return self.allocator.create(AstNodeVarDecl);
+        %defer self.allocator.destroy(node);
+
+        *node = AstNodeVarDecl {
+            .base = AstNode {.id = AstNode.Id.VarDecl},
+            .visib = visib,
+            .mut = mut,
+            .is_comptime = is_comptime,
+            .type_node = null,
+            .align_node = null,
+            .init_node = null,
+            // initialized later
+            .name_token = undefined,
+            .eq_token = undefined,
+        };
+        return node;
+    }
+
+    fn createIdentifier(self: &Parser, name_token: &const Token) -> %&AstNodeIdentifier {
+        const node = %return self.allocator.create(AstNodeIdentifier);
+        %defer self.allocator.destroy(node);
+
+        *node = AstNodeIdentifier {
+            .base = AstNode {.id = AstNode.Id.Identifier},
+            .name_token = *name_token,
+        };
+        return node;
+    }
+
+    fn parseError(self: &Parser, token: &const Token, comptime fmt: []const u8, args: ...) -> error {
+        const loc = self.tokenizer.getTokenLocation(token);
+        warn("{}:{}:{}: error: " ++ fmt ++ "\n", self.source_file_name, loc.line + 1, loc.column + 1, args);
+        warn("{}\n", self.tokenizer.buffer[loc.line_start..loc.line_end]);
+        {
+            var i: usize = 0;
+            while (i < loc.column) : (i += 1) {
+                warn(" ");
             }
         }
+        {
+            const caret_count = token.end - token.start;
+            var i: usize = 0;
+            while (i < caret_count) : (i += 1) {
+                warn("~");
+            }
+        }
+        warn("\n");
+        return error.ParseError;
+    }
+
+    fn expectToken(self: &Parser, token: &const Token, id: @TagType(Token.Id)) -> %void {
+        if (token.id != id) {
+            return self.parseError(token, "expected {}, found {}", @tagName(id), @tagName(token.id));
+        }
+    }
+
+    fn eatToken(self: &Parser, id: @TagType(Token.Id)) -> %Token {
+        const token = self.getNextToken();
+        %return self.expectToken(token, id);
+        return token;
     }
+
+    fn putBackToken(self: &Parser, token: &const Token) {
+        self.put_back_tokens[self.put_back_count] = *token;
+        self.put_back_count += 1;
+    }
+
+    fn getNextToken(self: &Parser) -> Token {
+        return if (self.put_back_count != 0) {
+            const put_back_index = self.put_back_count - 1;
+            const put_back_token = self.put_back_tokens[put_back_index];
+            self.put_back_count = put_back_index;
+            put_back_token
+        } else {
+            self.tokenizer.next()
+        };
+    }
+
 };
 
 
@@ -384,8 +712,11 @@ pub fn main2() -> %void {
 
     const target_file_buf = %return io.readFileAlloc(target_file, allocator);
 
+    warn("====input:====\n");
+
     warn("{}", target_file_buf);
 
+    warn("====tokenization:====\n");
     {
         var tokenizer = Tokenizer.init(target_file_buf);
         while (true) {
@@ -397,7 +728,26 @@ pub fn main2() -> %void {
         }
     }
 
+    warn("====parse:====\n");
+
     var tokenizer = Tokenizer.init(target_file_buf);
-    var parser = Parser.init(&tokenizer, allocator);
-    %return parser.parse();
+    var parser = Parser.init(&tokenizer, allocator, target_file);
+    const node = %return parser.parse();
+
+
+    render(node, 0);
+}
+
+fn render(node: &AstNode, indent: usize) {
+    {
+        var i: usize = 0;
+        while (i < indent) : (i += 1) {
+            warn(" ");
+        }
+    }
+    warn("{}\n", @tagName(node.id));
+    var i: usize = 0;
+    while (node.iterate(i)) |child| : (i += 1) {
+        render(child, indent + 2);
+    }
 }
std/mem.zig
@@ -8,6 +8,7 @@ pub const Cmp = math.Cmp;
 pub const Allocator = struct {
     /// Allocate byte_count bytes and return them in a slice, with the
     /// slice's pointer aligned at least to alignment bytes.
+    /// The returned newly allocated memory is undefined.
     allocFn: fn (self: &Allocator, byte_count: usize, alignment: u29) -> %[]u8,
 
     /// If `new_byte_count > old_mem.len`:
@@ -17,6 +18,8 @@ pub const Allocator = struct {
     /// If `new_byte_count <= old_mem.len`:
     /// * this function must return successfully. 
     /// * alignment <= alignment of old_mem.ptr
+    ///
+    /// The returned newly allocated memory is undefined.
     reallocFn: fn (self: &Allocator, old_mem: []u8, new_byte_count: usize, alignment: u29) -> %[]u8,
 
     /// Guaranteed: `old_mem.len` is the same as what was returned from `allocFn` or `reallocFn`
@@ -40,6 +43,10 @@ pub const Allocator = struct {
     {
         const byte_count = %return math.mul(usize, @sizeOf(T), n);
         const byte_slice = %return self.allocFn(self, byte_count, alignment);
+        // This loop should get optimized out in ReleaseFast mode
+        for (byte_slice) |*byte| {
+            *byte = undefined;
+        }
         return ([]align(alignment) T)(@alignCast(alignment, byte_slice));
     }
 
@@ -56,6 +63,10 @@ pub const Allocator = struct {
 
         const byte_count = %return math.mul(usize, @sizeOf(T), n);
         const byte_slice = %return self.reallocFn(self, ([]u8)(old_mem), byte_count, alignment);
+        // This loop should get optimized out in ReleaseFast mode
+        for (byte_slice[old_mem.len..]) |*byte| {
+            *byte = undefined;
+        }
         return ([]T)(@alignCast(alignment, byte_slice));
     }