Commit 7c2c0e36f8

Andrew Kelley <andrew@ziglang.org>
2020-05-20 23:39:54
stage2 parser: different memory layout of ParamDecl
Instead of being its own node, it's a struct inside FnProto. Instead of FnProto having a SinglyLinkedList of ParamDecl nodes, ParamDecls are appended directly in memory after the FnProto. throughput: 72.2 MiB/s => 72.9 MiB/s maxrss: 70 KB => 68 KB Importantly, the API is improved as well since the data is arranged linearly in memory.
1 parent 5db9f30
Changed files (3)
lib/std/zig/ast.zig
@@ -477,7 +477,6 @@ pub const Node = struct {
         ErrorTag,
         AsmInput,
         AsmOutput,
-        ParamDecl,
         FieldInitializer,
     };
 
@@ -533,7 +532,6 @@ pub const Node = struct {
             switch (n.id) {
                 .Root,
                 .ContainerField,
-                .ParamDecl,
                 .Block,
                 .Payload,
                 .PointerPayload,
@@ -819,7 +817,7 @@ pub const Node = struct {
             return @ptrCast(*ContainerDecl, bytes.ptr);
         }
 
-        pub fn free(self: *Decl, allocator: *mem.Allocator) void {
+        pub fn free(self: *ContainerDecl, allocator: *mem.Allocator) void {
             const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.fields_and_decls_len)];
             allocator.free(bytes);
         }
@@ -979,13 +977,14 @@ pub const Node = struct {
         }
     };
 
+    /// The params are directly after the FnProto in memory.
     pub const FnProto = struct {
         base: Node = Node{ .id = .FnProto },
         doc_comments: ?*DocComment,
         visib_token: ?TokenIndex,
         fn_token: TokenIndex,
         name_token: ?TokenIndex,
-        params: ParamList,
+        params_len: NodeIndex,
         return_type: ReturnType,
         var_args_token: ?TokenIndex,
         extern_export_inline_token: ?TokenIndex,
@@ -997,16 +996,75 @@ pub const Node = struct {
         is_extern_prototype: bool = false, // TODO: Remove once extern fn rewriting is
         is_async: bool = false, // TODO: remove once async fn rewriting is
 
-        pub const ParamList = LinkedList(*Node);
-
         pub const ReturnType = union(enum) {
             Explicit: *Node,
             InferErrorSet: *Node,
             Invalid: TokenIndex,
         };
 
+        pub const ParamDecl = struct {
+            doc_comments: ?*DocComment,
+            comptime_token: ?TokenIndex,
+            noalias_token: ?TokenIndex,
+            name_token: ?TokenIndex,
+            param_type: ParamType,
+
+            pub const ParamType = union(enum) {
+                var_type: *Node,
+                var_args: TokenIndex,
+                type_expr: *Node,
+            };
+
+            pub fn iterate(self: *const ParamDecl) Node.Iterator {
+                return .{ .parent_node = &self.base, .index = 0, .node = null };
+            }
+
+            pub fn iterateNext(self: *const ParamDecl, it: *Node.Iterator) ?*Node {
+                var i = it.index;
+                it.index += 1;
+
+                if (i < 1) {
+                    switch (self.param_type) {
+                        .var_args => return null,
+                        .var_type, .type_expr => |node| return node,
+                    }
+                }
+                i -= 1;
+
+                return null;
+            }
+
+            pub fn firstToken(self: *const ParamDecl) TokenIndex {
+                if (self.comptime_token) |comptime_token| return comptime_token;
+                if (self.noalias_token) |noalias_token| return noalias_token;
+                if (self.name_token) |name_token| return name_token;
+                switch (self.param_type) {
+                    .var_args => |tok| return tok,
+                    .var_type, .type_expr => |node| return node.firstToken(),
+                }
+            }
+
+            pub fn lastToken(self: *const ParamDecl) TokenIndex {
+                switch (self.param_type) {
+                    .var_args => |tok| return tok,
+                    .var_type, .type_expr => |node| return node.lastToken(),
+                }
+            }
+        };
+
+        /// After this the caller must initialize the params list.
+        pub fn alloc(allocator: *mem.Allocator, params_len: NodeIndex) !*FnProto {
+            const bytes = try allocator.alignedAlloc(u8, @alignOf(FnProto), sizeInBytes(params_len));
+            return @ptrCast(*FnProto, bytes.ptr);
+        }
+
+        pub fn free(self: *FnProto, allocator: *mem.Allocator) void {
+            const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.params_len)];
+            allocator.free(bytes);
+        }
+
         pub fn iterate(self: *const FnProto) Node.Iterator {
-            return .{ .parent_node = &self.base, .index = 0, .node = self.params.first };
+            return .{ .parent_node = &self.base, .index = 0, .node = null };
         }
 
         pub fn iterateNext(self: *const FnProto, it: *Node.Iterator) ?*Node {
@@ -1018,11 +1076,17 @@ pub const Node = struct {
                 i -= 1;
             }
 
-            if (it.node) |param| {
-                it.index -= 1;
-                it.node = param.next;
-                return param.data;
+            if (i < self.params_len) {
+                switch (self.paramsConst()[i].param_type) {
+                    .var_type => |n| return n,
+                    .var_args => {
+                        i += 1;
+                        it.index += 1;
+                    },
+                    .type_expr => |n| return n,
+                }
             }
+            i -= self.params_len;
 
             if (self.align_expr) |align_expr| {
                 if (i < 1) return align_expr;
@@ -1064,6 +1128,20 @@ pub const Node = struct {
                 .Invalid => |tok| return tok,
             }
         }
+
+        pub fn params(self: *FnProto) []ParamDecl {
+            const decls_start = @ptrCast([*]u8, self) + @sizeOf(FnProto);
+            return @ptrCast([*]ParamDecl, decls_start)[0..self.params_len];
+        }
+
+        pub fn paramsConst(self: *const FnProto) []const ParamDecl {
+            const decls_start = @ptrCast([*]const u8, self) + @sizeOf(FnProto);
+            return @ptrCast([*]const ParamDecl, decls_start)[0..self.params_len];
+        }
+
+        fn sizeInBytes(params_len: NodeIndex) usize {
+            return @sizeOf(FnProto) + @sizeOf(ParamDecl) * @as(usize, params_len);
+        }
     };
 
     pub const AnyFrameType = struct {
@@ -1102,57 +1180,6 @@ pub const Node = struct {
         }
     };
 
-    pub const ParamDecl = struct {
-        base: Node = Node{ .id = .ParamDecl },
-        doc_comments: ?*DocComment,
-        comptime_token: ?TokenIndex,
-        noalias_token: ?TokenIndex,
-        name_token: ?TokenIndex,
-        param_type: ParamType,
-
-        pub const ParamType = union(enum) {
-            var_type: *Node,
-            var_args: TokenIndex,
-            type_expr: *Node,
-        };
-
-        pub fn iterate(self: *const ParamDecl) Node.Iterator {
-            return .{ .parent_node = &self.base, .index = 0, .node = null };
-        }
-
-        pub fn iterateNext(self: *const ParamDecl, it: *Node.Iterator) ?*Node {
-            var i = it.index;
-            it.index += 1;
-
-            if (i < 1) {
-                switch (self.param_type) {
-                    .var_args => return null,
-                    .var_type, .type_expr => |node| return node,
-                }
-            }
-            i -= 1;
-
-            return null;
-        }
-
-        pub fn firstToken(self: *const ParamDecl) TokenIndex {
-            if (self.comptime_token) |comptime_token| return comptime_token;
-            if (self.noalias_token) |noalias_token| return noalias_token;
-            if (self.name_token) |name_token| return name_token;
-            switch (self.param_type) {
-                .var_args => |tok| return tok,
-                .var_type, .type_expr => |node| return node.firstToken(),
-            }
-        }
-
-        pub fn lastToken(self: *const ParamDecl) TokenIndex {
-            switch (self.param_type) {
-                .var_args => |tok| return tok,
-                .var_type, .type_expr => |node| return node.lastToken(),
-            }
-        }
-    };
-
     pub const Block = struct {
         base: Node = Node{ .id = .Block },
         label: ?TokenIndex,
lib/std/zig/parse.zig
@@ -71,7 +71,7 @@ const Parser = struct {
         const eof_token = p.eatToken(.Eof).?;
 
         const node = try Node.Root.create(&p.arena.allocator, decls.len, eof_token);
-        std.mem.copy(*ast.Node, node.decls(), decls);
+        std.mem.copy(*Node, node.decls(), decls);
 
         return node;
     }
@@ -96,8 +96,8 @@ const Parser = struct {
     ///      / ContainerField COMMA ContainerMembers
     ///      / ContainerField
     ///      /
-    fn parseContainerMembers(p: *Parser, top_level: bool) ![]*ast.Node {
-        var list = std.ArrayList(*ast.Node).init(p.gpa);
+    fn parseContainerMembers(p: *Parser, top_level: bool) ![]*Node {
+        var list = std.ArrayList(*Node).init(p.gpa);
         defer list.deinit();
 
         var field_state: union(enum) {
@@ -522,6 +522,7 @@ const Parser = struct {
         const name_token = p.eatToken(.Identifier);
         const lparen = try p.expectToken(.LParen);
         const params = try p.parseParamDeclList(&var_args_token);
+        defer p.gpa.free(params);
         const rparen = try p.expectToken(.RParen);
         const align_expr = try p.parseByteAlign();
         const section_expr = try p.parseLinkSection();
@@ -544,13 +545,13 @@ const Parser = struct {
         else
             R{ .Explicit = return_type_expr.? };
 
-        const fn_proto_node = try p.arena.allocator.create(Node.FnProto);
+        const fn_proto_node = try Node.FnProto.alloc(&p.arena.allocator, params.len);
         fn_proto_node.* = .{
             .doc_comments = null,
             .visib_token = null,
             .fn_token = fn_token,
             .name_token = name_token,
-            .params = params,
+            .params_len = params.len,
             .return_type = return_type,
             .var_args_token = var_args_token,
             .extern_export_inline_token = null,
@@ -562,6 +563,7 @@ const Parser = struct {
             .is_extern_prototype = is_extern,
             .is_async = is_async,
         };
+        std.mem.copy(Node.FnProto.ParamDecl, fn_proto_node.params(), params);
 
         return &fn_proto_node.base;
     }
@@ -621,7 +623,7 @@ const Parser = struct {
         var type_expr: ?*Node = null;
         if (p.eatToken(.Colon)) |_| {
             if (p.eatToken(.Keyword_var)) |var_tok| {
-                const node = try p.arena.allocator.create(ast.Node.VarType);
+                const node = try p.arena.allocator.create(Node.VarType);
                 node.* = .{ .token = var_tok };
                 type_expr = &node.base;
             } else {
@@ -1936,7 +1938,7 @@ const Parser = struct {
     }
 
     /// ParamDecl <- (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType
-    fn parseParamDecl(p: *Parser) !?*Node {
+    fn parseParamDecl(p: *Parser, list: *std.ArrayList(Node.FnProto.ParamDecl)) !bool {
         const doc_comments = try p.parseDocComment();
         const noalias_token = p.eatToken(.Keyword_noalias);
         const comptime_token = if (noalias_token == null) p.eatToken(.Keyword_comptime) else null;
@@ -1951,31 +1953,30 @@ const Parser = struct {
             if (noalias_token == null and
                 comptime_token == null and
                 name_token == null and
-                doc_comments == null) return null;
+                doc_comments == null) return false;
             try p.errors.append(p.gpa, .{
                 .ExpectedParamType = .{ .token = p.tok_i },
             });
             return error.ParseError;
         };
 
-        const param_decl = try p.arena.allocator.create(Node.ParamDecl);
-        param_decl.* = .{
+        (try list.addOne()).* = .{
             .doc_comments = doc_comments,
             .comptime_token = comptime_token,
             .noalias_token = noalias_token,
             .name_token = name_token,
             .param_type = param_type,
         };
-        return &param_decl.base;
+        return true;
     }
 
     /// ParamType
     ///     <- KEYWORD_var
     ///      / DOT3
     ///      / TypeExpr
-    fn parseParamType(p: *Parser) !?Node.ParamDecl.ParamType {
+    fn parseParamType(p: *Parser) !?Node.FnProto.ParamDecl.ParamType {
         // TODO cast from tuple to error union is broken
-        const P = Node.ParamDecl.ParamType;
+        const P = Node.FnProto.ParamDecl.ParamType;
         if (try p.parseVarType()) |node| return P{ .var_type = node };
         if (p.eatToken(.Ellipsis3)) |token| return P{ .var_args = token };
         if (try p.parseTypeExpr()) |node| return P{ .type_expr = node };
@@ -2587,7 +2588,7 @@ const Parser = struct {
 
                 if (p.eatToken(.Ellipsis2) != null) {
                     const end_expr = try p.parseExpr();
-                    const sentinel: ?*ast.Node = if (p.eatToken(.Colon) != null)
+                    const sentinel: ?*Node = if (p.eatToken(.Colon) != null)
                         try p.parseExpr()
                     else
                         null;
@@ -2616,7 +2617,7 @@ const Parser = struct {
             if (p.eatToken(.Period)) |period| {
                 if (try p.parseIdentifier()) |identifier| {
                     // TODO: It's a bit weird to return an InfixOp from the SuffixOp parser.
-                    // Should there be an ast.Node.SuffixOp.FieldAccess variant? Or should
+                    // Should there be an Node.SuffixOp.FieldAccess variant? Or should
                     // this grammar rule be altered?
                     const node = try p.arena.allocator.create(Node.InfixOp);
                     node.* = .{
@@ -2652,13 +2653,13 @@ const Parser = struct {
     /// ExprList <- (Expr COMMA)* Expr?
     fn parseFnCallArguments(p: *Parser) !?AnnotatedParamList {
         if (p.eatToken(.LParen) == null) return null;
-        const list = try ListParseFn(Node.FnProto.ParamList, parseExpr)(p);
+        const list = try ListParseFn(std.SinglyLinkedList(*Node), parseExpr)(p);
         const rparen = try p.expectToken(.RParen);
         return AnnotatedParamList{ .list = list, .rparen = rparen };
     }
 
     const AnnotatedParamList = struct {
-        list: Node.FnProto.ParamList, // NOTE: may also be any other type SegmentedList(*Node, 2)
+        list: std.SinglyLinkedList(*Node),
         rparen: TokenIndex,
     };
 
@@ -2797,14 +2798,14 @@ const Parser = struct {
             .lbrace_token = lbrace,
             .rbrace_token = rbrace,
         };
-        std.mem.copy(*ast.Node, node.fieldsAndDecls(), members);
+        std.mem.copy(*Node, node.fieldsAndDecls(), members);
         return &node.base;
     }
 
     /// Holds temporary data until we are ready to construct the full ContainerDecl AST node.
     const ContainerDeclType = struct {
         kind_token: TokenIndex,
-        init_arg_expr: ast.Node.ContainerDecl.InitArg,
+        init_arg_expr: Node.ContainerDecl.InitArg,
     };
 
     /// ContainerDeclType
@@ -2893,14 +2894,11 @@ const Parser = struct {
     }
 
     /// ParamDeclList <- (ParamDecl COMMA)* ParamDecl?
-    fn parseParamDeclList(p: *Parser, var_args_token: *?TokenIndex) !Node.FnProto.ParamList {
-        var list = Node.FnProto.ParamList{};
-        var list_it = &list.first;
-        var last: ?*Node = null;
-        while (try p.parseParamDecl()) |node| {
-            last = node;
-            list_it = try p.llpush(*Node, list_it, node);
+    fn parseParamDeclList(p: *Parser, var_args_token: *?TokenIndex) ![]Node.FnProto.ParamDecl {
+        var list = std.ArrayList(Node.FnProto.ParamDecl).init(p.gpa);
+        defer list.deinit();
 
+        while (try p.parseParamDecl(&list)) {
             switch (p.tokens[p.tok_i].id) {
                 .Comma => _ = p.nextToken(),
                 // all possible delimiters
@@ -2914,13 +2912,13 @@ const Parser = struct {
                 },
             }
         }
-        if (last) |node| {
-            const param_type = node.cast(Node.ParamDecl).?.param_type;
+        if (list.items.len != 0) {
+            const param_type = list.items[list.items.len - 1].param_type;
             if (param_type == .var_args) {
                 var_args_token.* = param_type.var_args;
             }
         }
-        return list;
+        return list.toOwnedSlice();
     }
 
     const NodeParseFn = fn (p: *Parser) Error!?*Node;
lib/std/zig/render.zig
@@ -1479,13 +1479,11 @@ fn renderExpression(
                 try renderToken(tree, stream, lparen, indent, start_col, Space.None); // (
 
                 // render all on one line, no trailing comma
-                var it = fn_proto.params.first;
-                while (it) |param_decl_node_node| : (it = param_decl_node_node.next) {
-                    const param_decl_node = param_decl_node_node.data;
-                    try renderParamDecl(allocator, stream, tree, indent, start_col, param_decl_node, Space.None);
+                for (fn_proto.params()) |param_decl, i| {
+                    try renderParamDecl(allocator, stream, tree, indent, start_col, param_decl, Space.None);
 
-                    if (param_decl_node_node.next != null) {
-                        const comma = tree.nextToken(param_decl_node.lastToken());
+                    if (i + 1 < fn_proto.params_len) {
+                        const comma = tree.nextToken(param_decl.lastToken());
                         try renderToken(tree, stream, comma, indent, start_col, Space.Space); // ,
                     }
                 }
@@ -1494,11 +1492,9 @@ fn renderExpression(
                 const new_indent = indent + indent_delta;
                 try renderToken(tree, stream, lparen, new_indent, start_col, Space.Newline); // (
 
-                var it = fn_proto.params.first;
-                while (it) |param_decl_node_node| : (it = param_decl_node_node.next) {
-                    const param_decl_node = param_decl_node_node.data;
+                for (fn_proto.params()) |param_decl| {
                     try stream.writeByteNTimes(' ', new_indent);
-                    try renderParamDecl(allocator, stream, tree, new_indent, start_col, param_decl_node, Space.Comma);
+                    try renderParamDecl(allocator, stream, tree, new_indent, start_col, param_decl, Space.Comma);
                 }
                 try stream.writeByteNTimes(' ', indent);
             }
@@ -2079,7 +2075,6 @@ fn renderExpression(
         .VarDecl,
         .Use,
         .TestDecl,
-        .ParamDecl,
         => unreachable,
     }
 }
@@ -2162,11 +2157,9 @@ fn renderParamDecl(
     tree: *ast.Tree,
     indent: usize,
     start_col: *usize,
-    base: *ast.Node,
+    param_decl: ast.Node.FnProto.ParamDecl,
     space: Space,
 ) (@TypeOf(stream).Error || Error)!void {
-    const param_decl = @fieldParentPtr(ast.Node.ParamDecl, "base", base);
-
     try renderDocComments(tree, stream, param_decl, indent, start_col);
 
     if (param_decl.comptime_token) |comptime_token| {