Commit 64c149ca19

Andrew Kelley <andrew@ziglang.org>
2020-05-20 22:13:02
fields and decls: ArrayList appending, memcpy to ast arena
This makes fields and decl ast nodes part of the Root and ContainerDecl AST nodes. Surprisingly, it's a performance regression from using a singly-linked list for these nodes: throughput: 76.5 MiB/s => 69.4 MiB/s However it has much better memory usage: maxrss: 392 KB => 77 KB It's also better API for consumers of the parser, since it is a flat list in memory.
1 parent b1bcdc9
Changed files (3)
lib/std/zig/ast.zig
@@ -6,6 +6,7 @@ const mem = std.mem;
 const Token = std.zig.Token;
 
 pub const TokenIndex = usize;
+pub const NodeIndex = usize;
 
 pub const Tree = struct {
     /// Reference to externally-owned data.
@@ -616,40 +617,62 @@ pub const Node = struct {
         }
     }
 
+    /// The decls data follows this struct in memory as an array of Node pointers.
     pub const Root = struct {
         base: Node = Node{ .id = .Root },
-        decls: DeclList,
         eof_token: TokenIndex,
+        decls_len: NodeIndex,
+
+        /// After this the caller must initialize the decls list.
+        pub fn create(allocator: *mem.Allocator, decls_len: NodeIndex, eof_token: TokenIndex) !*Root {
+            const bytes = try allocator.alignedAlloc(u8, @alignOf(Root), sizeInBytes(decls_len));
+            const self = @ptrCast(*Root, bytes.ptr);
+            self.* = .{
+                .eof_token = eof_token,
+                .decls_len = decls_len,
+            };
+            return self;
+        }
 
-        pub const DeclList = LinkedList(*Node);
+        pub fn destroy(self: *Decl, allocator: *mem.Allocator) void {
+            const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.decls_len)];
+            allocator.free(bytes);
+        }
 
         pub fn iterate(self: *const Root) Node.Iterator {
-            return .{ .parent_node = &self.base, .index = 0, .node = self.decls.first };
+            return .{ .parent_node = &self.base, .index = 0, .node = null };
         }
 
         pub fn iterateNext(self: *const Root, it: *Node.Iterator) ?*Node {
-            const decl = it.node orelse return null;
-            it.node = decl.next;
-            return decl.data;
+            var i = it.index;
+            it.index += 1;
+
+            if (i < self.decls_len) return self.declsConst()[i];
+            return null;
+        }
+
+        pub fn decls(self: *Root) []*Node {
+            const decls_start = @ptrCast([*]u8, self) + @sizeOf(Root);
+            return @ptrCast([*]*Node, decls_start)[0..self.decls_len];
+        }
+
+        pub fn declsConst(self: *const Root) []const *Node {
+            const decls_start = @ptrCast([*]const u8, self) + @sizeOf(Root);
+            return @ptrCast([*]const *Node, decls_start)[0..self.decls_len];
         }
 
         pub fn firstToken(self: *const Root) TokenIndex {
-            if (self.decls.first) |first| {
-                return first.data.firstToken();
-            } else {
-                return self.eof_token;
-            }
+            if (self.decls_len == 0) return self.eof_token;
+            return self.declsConst()[0].firstToken();
         }
 
         pub fn lastToken(self: *const Root) TokenIndex {
-            if (self.decls.first) |first| {
-                var node = first;
-                while (true) {
-                    node = node.next orelse return node.data.lastToken();
-                }
-            } else {
-                return self.eof_token;
-            }
+            if (self.decls_len == 0) return self.eof_token;
+            return self.declsConst()[self.decls_len - 1].lastToken();
+        }
+
+        fn sizeInBytes(decls_len: NodeIndex) usize {
+            return @sizeOf(Root) + @sizeOf(*Node) * @as(usize, decls_len);
         }
     };
 
@@ -777,14 +800,12 @@ pub const Node = struct {
 
     pub const ContainerDecl = struct {
         base: Node = Node{ .id = .ContainerDecl },
-        layout_token: ?TokenIndex,
         kind_token: TokenIndex,
-        init_arg_expr: InitArg,
-        fields_and_decls: DeclList,
+        layout_token: ?TokenIndex,
         lbrace_token: TokenIndex,
         rbrace_token: TokenIndex,
-
-        pub const DeclList = Root.DeclList;
+        fields_and_decls_len: NodeIndex,
+        init_arg_expr: InitArg,
 
         pub const InitArg = union(enum) {
             None,
@@ -792,8 +813,19 @@ pub const Node = struct {
             Type: *Node,
         };
 
+        /// After this the caller must initialize the fields_and_decls list.
+        pub fn alloc(allocator: *mem.Allocator, fields_and_decls_len: NodeIndex) !*ContainerDecl {
+            const bytes = try allocator.alignedAlloc(u8, @alignOf(ContainerDecl), sizeInBytes(fields_and_decls_len));
+            return @ptrCast(*ContainerDecl, bytes.ptr);
+        }
+
+        pub fn free(self: *Decl, allocator: *mem.Allocator) void {
+            const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.fields_and_decls_len)];
+            allocator.free(bytes);
+        }
+
         pub fn iterate(self: *const ContainerDecl) Node.Iterator {
-            return .{ .parent_node = &self.base, .index = 0, .node = self.fields_and_decls.first };
+            return .{ .parent_node = &self.base, .index = 0, .node = null };
         }
 
         pub fn iterateNext(self: *const ContainerDecl, it: *Node.Iterator) ?*Node {
@@ -808,10 +840,8 @@ pub const Node = struct {
                 .None, .Enum => {},
             }
 
-            if (it.node) |child| {
-                it.node = child.next;
-                return child.data;
-            }
+            if (i < self.fields_and_decls_len) return self.fieldsAndDeclsConst()[i];
+            i -= self.fields_and_decls_len;
 
             return null;
         }
@@ -826,6 +856,20 @@ pub const Node = struct {
         pub fn lastToken(self: *const ContainerDecl) TokenIndex {
             return self.rbrace_token;
         }
+
+        pub fn fieldsAndDecls(self: *ContainerDecl) []*Node {
+            const decls_start = @ptrCast([*]u8, self) + @sizeOf(ContainerDecl);
+            return @ptrCast([*]*Node, decls_start)[0..self.fields_and_decls_len];
+        }
+
+        pub fn fieldsAndDeclsConst(self: *const ContainerDecl) []const *Node {
+            const decls_start = @ptrCast([*]const u8, self) + @sizeOf(ContainerDecl);
+            return @ptrCast([*]const *Node, decls_start)[0..self.fields_and_decls_len];
+        }
+
+        fn sizeInBytes(fields_and_decls_len: NodeIndex) usize {
+            return @sizeOf(ContainerDecl) + @sizeOf(*Node) * @as(usize, fields_and_decls_len);
+        }
     };
 
     pub const ContainerField = struct {
@@ -1116,7 +1160,7 @@ pub const Node = struct {
         statements: StatementList,
         rbrace: TokenIndex,
 
-        pub const StatementList = Root.DeclList;
+        pub const StatementList = LinkedList(*Node);
 
         pub fn iterate(self: *const Block) Node.Iterator {
             return .{ .parent_node = &self.base, .index = 0, .node = self.statements.first };
@@ -2615,7 +2659,7 @@ pub const Node = struct {
 test "iterate" {
     var root = Node.Root{
         .base = Node{ .id = Node.Id.Root },
-        .decls = Node.Root.DeclList.init(std.testing.allocator),
+        .decls_len = 0,
         .eof_token = 0,
     };
     var base = &root.base;
lib/std/zig/parse.zig
@@ -63,16 +63,20 @@ const Parser = struct {
 
     /// Root <- skip ContainerMembers eof
     fn parseRoot(p: *Parser) Allocator.Error!*Node.Root {
-        const node = try p.arena.allocator.create(Node.Root);
-        node.* = .{
-            .decls = try parseContainerMembers(p, true),
-            // parseContainerMembers will try to skip as much
-            // invalid tokens as it can so this can only be the EOF
-            .eof_token = p.eatToken(.Eof).?,
-        };
+        const decls = try parseContainerMembers(p, true);
+        defer p.gpa.free(decls);
+
+        // parseContainerMembers will try to skip as much
+        // invalid tokens as it can so this can only be the EOF
+        const eof_token = p.eatToken(.Eof).?;
+
+        const node = try Node.Root.create(&p.arena.allocator, decls.len, eof_token);
+        std.mem.copy(*ast.Node, node.decls(), decls);
+
         return node;
     }
 
+    /// Helper function for appending elements to a singly linked list.
     fn llpush(
         p: *Parser,
         comptime T: type,
@@ -92,9 +96,9 @@ const Parser = struct {
     ///      / ContainerField COMMA ContainerMembers
     ///      / ContainerField
     ///      /
-    fn parseContainerMembers(p: *Parser, top_level: bool) !Node.Root.DeclList {
-        var list = Node.Root.DeclList{};
-        var list_it = &list.first;
+    fn parseContainerMembers(p: *Parser, top_level: bool) ![]*ast.Node {
+        var list = std.ArrayList(*ast.Node).init(p.gpa);
+        defer list.deinit();
 
         var field_state: union(enum) {
             /// no fields have been seen
@@ -110,7 +114,7 @@ const Parser = struct {
 
         while (true) {
             if (try p.parseContainerDocComments()) |node| {
-                list_it = try p.llpush(*Node, list_it, node);
+                try list.append(node);
                 continue;
             }
 
@@ -127,7 +131,7 @@ const Parser = struct {
                     field_state = .{ .end = node.firstToken() };
                 }
                 node.cast(Node.TestDecl).?.doc_comments = doc_comments;
-                list_it = try p.llpush(*Node, list_it, node);
+                try list.append(node);
                 continue;
             }
 
@@ -142,7 +146,7 @@ const Parser = struct {
                     field_state = .{ .end = node.firstToken() };
                 }
                 node.cast(Node.Comptime).?.doc_comments = doc_comments;
-                list_it = try p.llpush(*Node, list_it, node);
+                try list.append(node);
                 continue;
             }
 
@@ -173,7 +177,7 @@ const Parser = struct {
                     },
                     else => unreachable,
                 }
-                list_it = try p.llpush(*Node, list_it, node);
+                try list.append(node);
                 if (try p.parseAppendedDocComment(node.lastToken())) |appended_comment| {
                     switch (node.id) {
                         .FnProto => {},
@@ -215,7 +219,7 @@ const Parser = struct {
 
                 const field = node.cast(Node.ContainerField).?;
                 field.doc_comments = doc_comments;
-                list_it = try p.llpush(*Node, list_it, node);
+                try list.append(node);
                 const comma = p.eatToken(.Comma) orelse {
                     // try to continue parsing
                     const index = p.tok_i;
@@ -275,7 +279,7 @@ const Parser = struct {
             }
         }
 
-        return list;
+        return list.toOwnedSlice();
     }
 
     /// Attempts to find next container member by searching for certain tokens
@@ -2778,24 +2782,36 @@ const Parser = struct {
 
     /// ContainerDeclAuto <- ContainerDeclType LBRACE ContainerMembers RBRACE
     fn parseContainerDeclAuto(p: *Parser) !?*Node {
-        const node = (try p.parseContainerDeclType()) orelse return null;
+        const container_decl_type = (try p.parseContainerDeclType()) orelse return null;
         const lbrace = try p.expectToken(.LBrace);
         const members = try p.parseContainerMembers(false);
+        defer p.gpa.free(members);
         const rbrace = try p.expectToken(.RBrace);
 
-        const decl_type = node.cast(Node.ContainerDecl).?;
-        decl_type.fields_and_decls = members;
-        decl_type.lbrace_token = lbrace;
-        decl_type.rbrace_token = rbrace;
-
-        return node;
+        const node = try Node.ContainerDecl.alloc(&p.arena.allocator, members.len);
+        node.* = .{
+            .layout_token = null,
+            .kind_token = container_decl_type.kind_token,
+            .init_arg_expr = container_decl_type.init_arg_expr,
+            .fields_and_decls_len = members.len,
+            .lbrace_token = lbrace,
+            .rbrace_token = rbrace,
+        };
+        std.mem.copy(*ast.Node, node.fieldsAndDecls(), members);
+        return &node.base;
     }
 
+    /// Holds temporary data until we are ready to construct the full ContainerDecl AST node.
+    const ContainerDeclType = struct {
+        kind_token: TokenIndex,
+        init_arg_expr: ast.Node.ContainerDecl.InitArg,
+    };
+
     /// ContainerDeclType
     ///     <- KEYWORD_struct
     ///      / KEYWORD_enum (LPAREN Expr RPAREN)?
     ///      / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / Expr) RPAREN)?
-    fn parseContainerDeclType(p: *Parser) !?*Node {
+    fn parseContainerDeclType(p: *Parser) !?ContainerDeclType {
         const kind_token = p.nextToken();
 
         const init_arg_expr = switch (kind_token.ptr.id) {
@@ -2838,16 +2854,10 @@ const Parser = struct {
             },
         };
 
-        const node = try p.arena.allocator.create(Node.ContainerDecl);
-        node.* = .{
-            .layout_token = null,
+        return ContainerDeclType{
             .kind_token = kind_token.index,
             .init_arg_expr = init_arg_expr,
-            .fields_and_decls = undefined, // set by caller
-            .lbrace_token = undefined, // set by caller
-            .rbrace_token = undefined, // set by caller
         };
-        return &node.base;
     }
 
     /// ByteAlign <- KEYWORD_align LPAREN Expr RPAREN
lib/std/zig/render.zig
@@ -79,9 +79,10 @@ fn renderRoot(
     }
 
     var start_col: usize = 0;
-    var it = tree.root_node.decls.first orelse return;
+    var decl_i: ast.NodeIndex = 0;
+    const root_decls = tree.root_node.decls();
     while (true) {
-        var decl = it.data;
+        var decl = root_decls[decl_i];
 
         // This loop does the following:
         //
@@ -130,14 +131,15 @@ fn renderRoot(
             token_index = decl.firstToken();
 
             while (!fmt_active) {
-                it = it.next orelse {
+                decl_i += 1;
+                if (decl_i >= root_decls.len) {
                     // If there's no next reformatted `decl`, just copy the
                     // remaining input tokens and bail out.
                     const start = tree.tokens[copy_start_token_index].start;
                     try copyFixingWhitespace(stream, tree.source[start..]);
                     return;
-                };
-                decl = it.data;
+                }
+                decl = root_decls[decl_i];
                 var decl_first_token_index = decl.firstToken();
 
                 while (token_index < decl_first_token_index) : (token_index += 1) {
@@ -178,8 +180,9 @@ fn renderRoot(
         }
 
         try renderTopLevelDecl(allocator, stream, tree, 0, &start_col, decl);
-        it = it.next orelse return;
-        try renderExtraNewline(tree, stream, &start_col, it.data);
+        decl_i += 1;
+        if (decl_i >= root_decls.len) return;
+        try renderExtraNewline(tree, stream, &start_col, root_decls[decl_i]);
     }
 }
 
@@ -1189,7 +1192,7 @@ fn renderExpression(
                 },
             }
 
-            if (container_decl.fields_and_decls.first == null) {
+            if (container_decl.fields_and_decls_len == 0) {
                 try renderToken(tree, stream, container_decl.lbrace_token, indent + indent_delta, start_col, Space.None); // {
                 return renderToken(tree, stream, container_decl.rbrace_token, indent, start_col, space); // }
             }
@@ -1203,18 +1206,18 @@ fn renderExpression(
                 break :blk tree.tokens[maybe_comma].id == .Comma;
             };
 
+            const fields_and_decls = container_decl.fieldsAndDecls();
+
             // Check if the first declaration and the { are on the same line
             const src_has_newline = !tree.tokensOnSameLine(
                 container_decl.lbrace_token,
-                container_decl.fields_and_decls.first.?.data.firstToken(),
+                fields_and_decls[0].firstToken(),
             );
 
             // We can only print all the elements in-line if all the
             // declarations inside are fields
             const src_has_only_fields = blk: {
-                var it = container_decl.fields_and_decls.first;
-                while (it) |decl_node| : (it = decl_node.next) {
-                    const decl = decl_node.data;
+                for (fields_and_decls) |decl| {
                     if (decl.id != .ContainerField) break :blk false;
                 }
                 break :blk true;
@@ -1225,14 +1228,12 @@ fn renderExpression(
                 const new_indent = indent + indent_delta;
                 try renderToken(tree, stream, container_decl.lbrace_token, new_indent, start_col, .Newline); // {
 
-                var it = container_decl.fields_and_decls.first;
-                while (it) |decl_node| : (it = decl_node.next) {
-                    const decl = decl_node.data;
+                for (fields_and_decls) |decl, i| {
                     try stream.writeByteNTimes(' ', new_indent);
                     try renderContainerDecl(allocator, stream, tree, new_indent, start_col, decl, .Newline);
 
-                    if (decl_node.next) |next_decl| {
-                        try renderExtraNewline(tree, stream, start_col, next_decl.data);
+                    if (i + 1 < fields_and_decls.len) {
+                        try renderExtraNewline(tree, stream, start_col, fields_and_decls[i + 1]);
                     }
                 }
 
@@ -1245,10 +1246,8 @@ fn renderExpression(
                 const new_indent = indent + indent_delta;
                 try stream.writeByteNTimes(' ', new_indent);
 
-                var it = container_decl.fields_and_decls.first;
-                while (it) |decl_node| : (it = decl_node.next) {
-                    const decl = decl_node.data;
-                    const space_after_decl: Space = if (decl_node.next == null) .Newline else .Space;
+                for (fields_and_decls) |decl, i| {
+                    const space_after_decl: Space = if (i + 1 >= fields_and_decls.len) .Newline else .Space;
                     try renderContainerDecl(allocator, stream, tree, new_indent, start_col, decl, space_after_decl);
                 }
 
@@ -1257,9 +1256,7 @@ fn renderExpression(
                 // All the declarations on the same line
                 try renderToken(tree, stream, container_decl.lbrace_token, indent, start_col, .Space); // {
 
-                var it = container_decl.fields_and_decls.first;
-                while (it) |decl_node| : (it = decl_node.next) {
-                    const decl = decl_node.data;
+                for (fields_and_decls) |decl| {
                     try renderContainerDecl(allocator, stream, tree, indent, start_col, decl, .Space);
                 }
             }