Commit df12c1328e

Vexu <git@vexu.eu>
2020-01-07 15:05:13
std-c parser typing improvements
1 parent 3ed6d7d
Changed files (3)
lib/std/c/ast.zig
@@ -23,6 +23,11 @@ pub const Tree = struct {
         arena_allocator.deinit();
         // self is destroyed
     }
+
+    pub fn slice(tree: *Tree, token: TokenIndex) []const u8 {
+        const tok = tree.tokens.at(token);
+        return tok.source.buffer[tok.start..tok.end];
+    }
 };
 
 pub const Msg = struct {
@@ -47,19 +52,19 @@ pub const Error = union(enum) {
     DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"),
     DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"),
 
-    pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void {
+    pub fn render(self: *const Error, tree: *Tree, stream: var) !void {
         switch (self.*) {
-            .InvalidToken => |*x| return x.render(tokens, stream),
-            .ExpectedToken => |*x| return x.render(tokens, stream),
-            .ExpectedExpr => |*x| return x.render(tokens, stream),
-            .ExpectedStmt => |*x| return x.render(tokens, stream),
-            .ExpectedTypeName => |*x| return x.render(tokens, stream),
-            .ExpectedDeclarator => |*x| return x.render(tokens, stream),
-            .ExpectedFnBody => |*x| return x.render(tokens, stream),
-            .ExpectedInitializer => |*x| return x.render(tokens, stream),
-            .InvalidTypeSpecifier => |*x| return x.render(tokens, stream),
-            .DuplicateQualifier => |*x| return x.render(tokens, stream),
-            .DuplicateSpecifier => |*x| return x.render(tokens, stream),
+            .InvalidToken => |*x| return x.render(tree, stream),
+            .ExpectedToken => |*x| return x.render(tree, stream),
+            .ExpectedExpr => |*x| return x.render(tree, stream),
+            .ExpectedStmt => |*x| return x.render(tree, stream),
+            .ExpectedTypeName => |*x| return x.render(tree, stream),
+            .ExpectedDeclarator => |*x| return x.render(tree, stream),
+            .ExpectedFnBody => |*x| return x.render(tree, stream),
+            .ExpectedInitializer => |*x| return x.render(tree, stream),
+            .InvalidTypeSpecifier => |*x| return x.render(tree, stream),
+            .DuplicateQualifier => |*x| return x.render(tree, stream),
+            .DuplicateSpecifier => |*x| return x.render(tree, stream),
         }
     }
 
@@ -83,8 +88,8 @@ pub const Error = union(enum) {
         token: TokenIndex,
         expected_id: @TagType(Token.Id),
 
-        pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void {
-            const found_token = tokens.at(self.token);
+        pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void {
+            const found_token = tree.tokens.at(self.token);
             if (found_token.id == .Invalid) {
                 return stream.print("expected '{}', found invalid bytes", .{self.expected_id.symbol()});
             } else {
@@ -98,10 +103,10 @@ pub const Error = union(enum) {
         token: TokenIndex,
         type_spec: *Node.TypeSpec,
 
-        pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void {
+        pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void {
             try stream.write("invalid type specifier '");
-            try type_spec.spec.print(tokens, stream);
-            const token_name = tokens.at(self.token).id.symbol();
+            try type_spec.spec.print(tree, stream);
+            const token_name = tree.tokens.at(self.token).id.symbol();
             return stream.print("{}'", .{token_name});
         }
     };
@@ -110,14 +115,59 @@ pub const Error = union(enum) {
         return struct {
             token: TokenIndex,
 
-            pub fn render(self: *const @This(), tokens: *Tree.TokenList, stream: var) !void {
-                const actual_token = tokens.at(self.token);
+            pub fn render(self: *const @This(), tree: *Tree, stream: var) !void {
+                const actual_token = tree.tokens.at(self.token);
                 return stream.print(msg, .{actual_token.id.symbol()});
             }
         };
     }
 };
 
+pub const Type = struct {
+    pub const TypeList = std.SegmentedList(*Type, 4);
+    @"const": bool,
+    atomic: bool,
+    @"volatile": bool,
+    restrict: bool,
+
+    id: union(enum) {
+        Int: struct {
+            quals: Qualifiers,
+            id: Id,
+            is_signed: bool,
+
+            pub const Id = enum {
+                Char,
+                Short,
+                Int,
+                Long,
+                LongLong,
+            };
+        },
+        Float: struct {
+            quals: Qualifiers,
+            id: Id,
+
+            pub const Id = enum {
+                Float,
+                Double,
+                LongDouble,
+            };
+        },
+        Pointer: struct {
+            quals: Qualifiers,
+            child_type: *Type,
+        },
+        Function: struct {
+            return_type: *Type,
+            param_types: TypeList,
+        },
+        Typedef: *Type,
+        Record: *Node.RecordType,
+        Enum: *Node.EnumType,
+    },
+};
+
 pub const Node = struct {
     id: Id,
 
@@ -205,22 +255,128 @@ pub const Node = struct {
                 typename: *Node,
                 rparen: TokenIndex,
             },
+            Enum: *EnumType,
+            Record: *RecordType,
+            Typedef: struct {
+                sym: TokenIndex,
+                sym_type: *Type,
+            },
 
-            //todo
-            // @"enum",
-            // record,
-
-            Typedef: TokenIndex,
-
-            pub fn print(self: *@This(), self: *const @This(), tokens: *Tree.TokenList, stream: var) !void {
-                switch (self) {
+            pub fn print(self: *@This(), self: *const @This(), tree: *Tree, stream: var) !void {
+                switch (self.spec) {
                     .None => unreachable,
-                    else => @panic("TODO print type specifier"),
+                    .Void => |index| try stream.write(tree.slice(index)),
+                    .Char => |char| {
+                        if (char.sign) |s| {
+                            try stream.write(tree.slice(s));
+                            try stream.writeByte(' ');
+                        }
+                        try stream.write(tree.slice(char.char));
+                    },
+                    .Short => |short| {
+                        if (short.sign) |s| {
+                            try stream.write(tree.slice(s));
+                            try stream.writeByte(' ');
+                        }
+                        try stream.write(tree.slice(short.short));
+                        if (short.int) |i| {
+                            try stream.writeByte(' ');
+                            try stream.write(tree.slice(i));
+                        }
+                    },
+                    .Int => |int| {
+                        if (int.sign) |s| {
+                            try stream.write(tree.slice(s));
+                            try stream.writeByte(' ');
+                        }
+                        if (int.int) |i| {
+                            try stream.writeByte(' ');
+                            try stream.write(tree.slice(i));
+                        }
+                    },
+                    .Long => |long| {
+                        if (long.sign) |s| {
+                            try stream.write(tree.slice(s));
+                            try stream.writeByte(' ');
+                        }
+                        try stream.write(tree.slice(long.long));
+                        if (long.longlong) |l| {
+                            try stream.writeByte(' ');
+                            try stream.write(tree.slice(l));
+                        }
+                        if (long.int) |i| {
+                            try stream.writeByte(' ');
+                            try stream.write(tree.slice(i));
+                        }
+                    },
+                    .Float => |float| {
+                        try stream.write(tree.slice(float.float));
+                        if (float.complex) |c| {
+                            try stream.writeByte(' ');
+                            try stream.write(tree.slice(c));
+                        }
+                    },
+                    .Double => |double| {
+                        if (double.long) |l| {
+                            try stream.write(tree.slice(l));
+                            try stream.writeByte(' ');
+                        }
+                        try stream.write(tree.slice(double.double));
+                        if (double.complex) |c| {
+                            try stream.writeByte(' ');
+                            try stream.write(tree.slice(c));
+                        }
+                    },
+                    .Bool => |index| try stream.write(tree.slice(index)),
+                    .Typedef => |typedef| try stream.write(tree.slice(typedef.sym)),
+                    else => try stream.print("TODO print {}", self.spec),
                 }
             }
         } = .None,
     };
 
+    pub const EnumType = struct {
+        tok: TokenIndex,
+        name: ?TokenIndex,
+        body: ?struct {
+            lbrace: TokenIndex,
+
+            /// always EnumField
+            fields: FieldList,
+            rbrace: TokenIndex,
+        },
+
+        pub const FieldList = Root.DeclList;
+    };
+
+    pub const EnumField = struct {
+        base: Node = Node{ .id = EnumField },
+        name: TokenIndex,
+        value: ?*Node,
+    };
+
+    pub const RecordType = struct {
+        kind: union(enum) {
+            Struct: TokenIndex,
+            Union: TokenIndex,
+        },
+        name: ?TokenIndex,
+        body: ?struct {
+            lbrace: TokenIndex,
+
+            /// RecordField or StaticAssert
+            fields: FieldList,
+            rbrace: TokenIndex,
+        },
+
+        pub const FieldList = Root.DeclList;
+    };
+
+    pub const RecordField = struct {
+        base: Node = Node{ .id = RecordField },
+        // TODO
+    };
+
     pub const TypeQual = struct {
         @"const": ?TokenIndex = null,
         atomic: ?TokenIndex = null,
lib/std/c/parse.zig
@@ -3,6 +3,7 @@ const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const ast = std.c.ast;
 const Node = ast.Node;
+const Type = ast.Type;
 const Tree = ast.Tree;
 const TokenIndex = ast.TokenIndex;
 const Token = std.c.Token;
@@ -57,10 +58,12 @@ pub fn parse(allocator: *Allocator, source: []const u8) !*Tree {
     }
 
     var parser = Parser{
+        .symbols = Parser.SymbolList.init(allocator),
         .arena = arena,
         .it = &it,
         .tree = tree,
     };
+    defer parser.symbols.deinit();
 
     tree.root_node = try parser.root();
     return tree;
@@ -72,19 +75,35 @@ const Parser = struct {
     tree: *Tree,
 
     /// only used for scopes
-    arena_allocator: std.heap.ArenaAllocator,
-    // scopes: std.SegmentedLists(Scope),
+    symbols: SymbolList,
     warnings: bool = true,
 
-    // const Scope = struct {
-    //     types:
-    //     syms:
-    // };
+    const SymbolList = std.ArrayList(Symbol);
 
-    fn getTypeDef(parser: *Parser, tok: TokenIndex) bool {
-        return false; // TODO
-        // const token = parser.it.list.at(tok);
-        // return parser.typedefs.contains(token.slice());
+    const Symbol = struct {
+        name: []const u8,
+        ty: *Type,
+    };
+
+    fn pushScope(parser: *Parser) usize {
+        return parser.symbols.len;
+    }
+
+    fn popScope(parser: *Parser, len: usize) void {
+        parser.symbols.resize(len) catch unreachable;
+    }
+
+    fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Type {
+        const token = parser.it.list.at(tok);
+        const name = parser.tree.slice(token);
+        const syms = parser.symbols.toSliceConst();
+        var i = syms.len;
+        while (i > 0) : (i -= 1) {
+            if (mem.eql(u8, name, syms[i].name)) {
+                return syms[i].ty;
+            }
+        }
+        return null;
     }
 
     /// Root <- ExternalDeclaration* eof
@@ -264,8 +283,8 @@ const Parser = struct {
     ///     <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double
     ///     / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary /
     ///     / Keyword_atomic LPAREN TypeName RPAREN
-    ///     / EnumSpecifier
-    ///     / RecordSpecifier
+    ///     / EnumSpec
+    ///     / RecordSpec
     ///     / IDENTIFIER // typedef name
     ///     / TypeQual
     fn typeSpec(parser: *Parser, type_spec: *Node.TypeSpec) !bool {
@@ -473,22 +492,48 @@ const Parser = struct {
             } else if (parser.eatToken(.Keyword_enum)) |tok| {
                 if (type_spec.spec != .None)
                     break :blk;
-                @panic("TODO enum type");
-                // return true;
+                type_spec.Enum = try parser.enumSpec(tok);
+                return true;
             } else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| {
                 if (type_spec.spec != .None)
                     break :blk;
-                @panic("TODO record type");
-                // return true;
+                type_spec.Record = try parser.recordSpec();
+                return true;
             } else if (parser.eatToken(.Identifier)) |tok| {
-                if (!parser.getTypeDef(tok)) {
+                const ty = parser.getSymbol(tok) orelse {
                     parser.putBackToken(tok);
                     return false;
-                }
-                type_spec.spec = .{
-                    .Typedef = tok,
                 };
-                return true;
+                switch (ty) {
+                    .Enum => |e| {
+                        return parser.err(.{
+                            .MustUseKwToRefer = .{ .kw = e.identifier, .sym = tok },
+                        });
+                    },
+                    .Record => |r| {
+                        return parser.err(.{
+                            .MustUseKwToRefer = .{
+                                .kw = switch (r.kind) {
+                                    .Struct, .Union => |kw| kw,
+                                },
+                                .sym = tok,
+                            },
+                        });
+                    },
+                    .Typedef => {
+                        type_spec.spec = .{
+                            .Typedef = .{
+                                .sym = tok,
+                                .sym_type = ty,
+                            },
+                        };
+                        return true;
+                    },
+                    else => {
+                        parser.putBackToken(tok);
+                        return false;
+                    },
+                }
             }
         }
         return parser.err(.{
@@ -567,13 +612,13 @@ const Parser = struct {
         return false;
     }
 
-    /// EnumSpecifier <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)?
+    /// EnumSpec <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)?
     fn enumSpecifier(parser: *Parser) !*Node {}
 
     /// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA?
     fn enumField(parser: *Parser) !*Node {}
 
-    /// RecordSpecifier <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)?
+    /// RecordSpec <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)?
     fn recordSpecifier(parser: *Parser) !*Node {}
 
     /// RecordField
@@ -581,8 +626,7 @@ const Parser = struct {
     ///     \ StaticAssert
     fn recordField(parser: *Parser) !*Node {}
 
-    /// TypeName
-    ///     <- TypeSpec* AbstractDeclarator?
+    /// TypeName <- TypeSpec* AbstractDeclarator?
     fn typeName(parser: *Parser) !*Node {
 
     /// RecordDeclarator <- Declarator? (COLON ConstExpr)?
lib/std/c/tokenizer.zig
@@ -327,6 +327,7 @@ pub const Token = struct {
     };
 
     // TODO perfect hash at comptime
+    // TODO do this in the preprocessor
     pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id {
         var hash = std.hash_map.hashString(bytes);
         for (keywords) |kw| {
@@ -347,10 +348,6 @@ pub const Token = struct {
         return null;
     }
 
-    pub fn slice(tok: Token) []const u8 {
-        return tok.source.buffer[tok.start..tok.end];
-    }
-
     pub const NumSuffix = enum {
         None,
         F,