Commit f8f1c6ac06
Changed files (2)
lib
std
zig
tools
lib/std/zig/parse.zig
@@ -50,22 +50,7 @@ pub fn parse(gpa: Allocator, source: [:0]const u8) Allocator.Error!Ast {
const estimated_node_count = (tokens.len + 2) / 2;
try parser.nodes.ensureTotalCapacity(gpa, estimated_node_count);
- // Root node must be index 0.
- // Root <- skip ContainerMembers eof
- parser.nodes.appendAssumeCapacity(.{
- .tag = .root,
- .main_token = 0,
- .data = undefined,
- });
- const root_members = try parser.parseContainerMembers();
- const root_decls = try root_members.toSpan(&parser);
- if (parser.token_tags[parser.tok_i] != .eof) {
- try parser.warnExpected(.eof);
- }
- parser.nodes.items(.data)[0] = .{
- .lhs = root_decls.start,
- .rhs = root_decls.end,
- };
+ try parser.parseRoot();
// TODO experiment with compacting the MultiArrayList slices here
return Ast{
@@ -237,12 +222,33 @@ const Parser = struct {
return error.ParseError;
}
+ /// Root <- skip container_doc_comment? ContainerMembers eof
+ fn parseRoot(p: *Parser) !void {
+ // Root node must be index 0.
+ p.nodes.appendAssumeCapacity(.{
+ .tag = .root,
+ .main_token = 0,
+ .data = undefined,
+ });
+ const root_members = try p.parseContainerMembers();
+ const root_decls = try root_members.toSpan(p);
+ if (p.token_tags[p.tok_i] != .eof) {
+ try p.warnExpected(.eof);
+ }
+ p.nodes.items(.data)[0] = .{
+ .lhs = root_decls.start,
+ .rhs = root_decls.end,
+ };
+ }
+
/// ContainerMembers <- ContainerDeclarations (ContainerField COMMA)* (ContainerField / ContainerDeclarations)
+ ///
/// ContainerDeclarations
/// <- TestDecl ContainerDeclarations
/// / ComptimeDecl ContainerDeclarations
/// / doc_comment? KEYWORD_pub? Decl ContainerDeclarations
/// /
+ ///
/// ComptimeDecl <- KEYWORD_comptime Block
fn parseContainerMembers(p: *Parser) !Members {
const scratch_top = p.scratch.items.len;
@@ -887,7 +893,9 @@ const Parser = struct {
}
}
- /// ContainerField <- KEYWORD_comptime? IDENTIFIER (COLON TypeExpr ByteAlign?)? (EQUAL Expr)?
+ /// ContainerField
+ /// <- doc_comment? KEYWORD_comptime? IDENTIFIER (COLON TypeExpr)? ByteAlign? (EQUAL Expr)?
+ /// / doc_comment? KEYWORD_comptime? (IDENTIFIER COLON)? !KEYWORD_fn TypeExpr ByteAlign? (EQUAL Expr)?
fn expectContainerField(p: *Parser) !Node.Index {
var main_token = p.tok_i;
_ = p.eatToken(.keyword_comptime);
@@ -1173,6 +1181,7 @@ const Parser = struct {
}
/// ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload
+ ///
/// ForStatement
/// <- ForPrefix BlockExpr ( KEYWORD_else Statement )?
/// / ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement )
@@ -1234,6 +1243,7 @@ const Parser = struct {
}
/// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr?
+ ///
/// WhileStatement
/// <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )?
/// / WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement )
@@ -1368,13 +1378,18 @@ const Parser = struct {
}
/// AssignExpr <- Expr (AssignOp Expr)?
+ ///
/// AssignOp
/// <- ASTERISKEQUAL
+ /// / ASTERISKPIPEEQUAL
/// / SLASHEQUAL
/// / PERCENTEQUAL
/// / PLUSEQUAL
+ /// / PLUSPIPEEQUAL
/// / MINUSEQUAL
+ /// / MINUSPIPEEQUAL
/// / LARROW2EQUAL
+ /// / LARROW2PIPEEQUAL
/// / RARROW2EQUAL
/// / AMPERSANDEQUAL
/// / CARETEQUAL
@@ -1553,6 +1568,7 @@ const Parser = struct {
}
/// PrefixExpr <- PrefixOp* PrimaryExpr
+ ///
/// PrefixOp
/// <- EXCLAMATIONMARK
/// / MINUS
@@ -1591,17 +1607,21 @@ const Parser = struct {
}
/// TypeExpr <- PrefixTypeOp* ErrorUnionExpr
+ ///
/// PrefixTypeOp
/// <- QUESTIONMARK
/// / KEYWORD_anyframe MINUSRARROW
/// / SliceTypeStart (ByteAlign / AddrSpace / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)*
/// / PtrTypeStart (AddrSpace / KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)*
/// / ArrayTypeStart
+ ///
/// SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET
+ ///
/// PtrTypeStart
/// <- ASTERISK
/// / ASTERISK2
/// / LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET
+ ///
/// ArrayTypeStart <- LBRACKET Expr (COLON Expr)? RBRACKET
fn parseTypeExpr(p: *Parser) Error!Node.Index {
switch (p.token_tags[p.tok_i]) {
@@ -2068,6 +2088,7 @@ const Parser = struct {
}
/// ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload
+ ///
/// ForExpr <- ForPrefix Expr (KEYWORD_else Expr)?
fn parseForExpr(p: *Parser) !Node.Index {
const for_token = p.eatToken(.keyword_for) orelse return null_node;
@@ -2103,6 +2124,7 @@ const Parser = struct {
}
/// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr?
+ ///
/// WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)?
fn parseWhileExpr(p: *Parser) !Node.Index {
const while_token = p.eatToken(.keyword_while) orelse return null_node;
@@ -2154,6 +2176,7 @@ const Parser = struct {
}
/// CurlySuffixExpr <- TypeExpr InitList?
+ ///
/// InitList
/// <- LBRACE FieldInit (COMMA FieldInit)* COMMA? RBRACE
/// / LBRACE Expr (COMMA Expr)* COMMA? RBRACE
@@ -2272,7 +2295,9 @@ const Parser = struct {
/// SuffixExpr
/// <- KEYWORD_async PrimaryTypeExpr SuffixOp* FnCallArguments
/// / PrimaryTypeExpr (SuffixOp / FnCallArguments)*
+ ///
/// FnCallArguments <- LPAREN ExprList RPAREN
+ ///
/// ExprList <- (Expr COMMA)* Expr?
fn parseSuffixExpr(p: *Parser) !Node.Index {
if (p.eatToken(.keyword_async)) |_| {
@@ -2410,18 +2435,26 @@ const Parser = struct {
/// / KEYWORD_unreachable
/// / STRINGLITERAL
/// / SwitchExpr
+ ///
/// ContainerDecl <- (KEYWORD_extern / KEYWORD_packed)? ContainerDeclAuto
- /// ContainerDeclAuto <- ContainerDeclType LBRACE ContainerMembers RBRACE
+ ///
+ /// ContainerDeclAuto <- ContainerDeclType LBRACE container_doc_comment? ContainerMembers RBRACE
+ ///
/// InitList
/// <- LBRACE FieldInit (COMMA FieldInit)* COMMA? RBRACE
/// / LBRACE Expr (COMMA Expr)* COMMA? RBRACE
/// / LBRACE RBRACE
+ ///
/// ErrorSetDecl <- KEYWORD_error LBRACE IdentifierList RBRACE
+ ///
/// GroupedExpr <- LPAREN Expr RPAREN
+ ///
/// IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)?
+ ///
/// LabeledTypeExpr
/// <- BlockLabel Block
/// / BlockLabel? LoopTypeExpr
+ ///
/// LoopTypeExpr <- KEYWORD_inline? (ForTypeExpr / WhileTypeExpr)
fn parsePrimaryTypeExpr(p: *Parser) !Node.Index {
switch (p.token_tags[p.tok_i]) {
@@ -2751,6 +2784,7 @@ const Parser = struct {
}
/// ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload
+ ///
/// ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr)?
fn parseForTypeExpr(p: *Parser) !Node.Index {
const for_token = p.eatToken(.keyword_for) orelse return null_node;
@@ -2786,6 +2820,7 @@ const Parser = struct {
}
/// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr?
+ ///
/// WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)?
fn parseWhileTypeExpr(p: *Parser) !Node.Index {
const while_token = p.eatToken(.keyword_while) orelse return null_node;
@@ -2861,11 +2896,17 @@ const Parser = struct {
}
/// AsmExpr <- KEYWORD_asm KEYWORD_volatile? LPAREN Expr AsmOutput? RPAREN
+ ///
/// AsmOutput <- COLON AsmOutputList AsmInput?
+ ///
/// AsmInput <- COLON AsmInputList AsmClobbers?
+ ///
/// AsmClobbers <- COLON StringList
+ ///
/// StringList <- (STRINGLITERAL COMMA)* STRINGLITERAL?
+ ///
/// AsmOutputList <- (AsmOutputItem COMMA)* AsmOutputItem?
+ ///
/// AsmInputList <- (AsmInputItem COMMA)* AsmInputItem?
fn expectAsmExpr(p: *Parser) !Node.Index {
const asm_token = p.assertToken(.keyword_asm);
@@ -3069,15 +3110,17 @@ const Parser = struct {
return expr_node;
}
- /// ParamDecl
- /// <- (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType
- /// / DOT3
- /// ParamType
- /// <- Keyword_anytype
- /// / TypeExpr
/// This function can return null nodes and then still return nodes afterwards,
/// such as in the case of anytype and `...`. Caller must look for rparen to find
/// out when there are no more param decls left.
+ ///
+ /// ParamDecl
+ /// <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType
+ /// / DOT3
+ ///
+ /// ParamType
+ /// <- KEYWORD_anytype
+ /// / TypeExpr
fn expectParamDecl(p: *Parser) !Node.Index {
_ = try p.eatDocComments();
switch (p.token_tags[p.tok_i]) {
@@ -3119,8 +3162,9 @@ const Parser = struct {
return identifier;
}
- /// PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE
/// Returns the first identifier token, if any.
+ ///
+ /// PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE
fn parsePtrIndexPayload(p: *Parser) !TokenIndex {
_ = p.eatToken(.pipe) orelse return @as(TokenIndex, 0);
_ = p.eatToken(.asterisk);
@@ -3133,6 +3177,7 @@ const Parser = struct {
}
/// SwitchProng <- KEYWORD_inline? SwitchCase EQUALRARROW PtrIndexPayload? AssignExpr
+ ///
/// SwitchCase
/// <- SwitchItem (COMMA SwitchItem)* COMMA?
/// / KEYWORD_else
@@ -3385,6 +3430,7 @@ const Parser = struct {
}
/// Caller must have already verified the first token.
+ ///
/// ContainerDeclAuto <- ContainerDeclType LBRACE container_doc_comment? ContainerMembers RBRACE
///
/// ContainerDeclType
@@ -3556,6 +3602,7 @@ const Parser = struct {
}
/// Holds temporary data until we are ready to construct the full ContainerDecl AST node.
+ ///
/// ByteAlign <- KEYWORD_align LPAREN Expr RPAREN
fn parseByteAlign(p: *Parser) !Node.Index {
_ = p.eatToken(.keyword_align) orelse return null_node;
@@ -3625,6 +3672,7 @@ const Parser = struct {
}
/// FnCallArguments <- LPAREN ExprList RPAREN
+ ///
/// ExprList <- (Expr COMMA)* Expr?
fn parseBuiltinCall(p: *Parser) !Node.Index {
const builtin_token = p.assertToken(.builtin);
@@ -3698,7 +3746,7 @@ const Parser = struct {
}
}
- /// KEYWORD_if LPAREN Expr RPAREN PtrPayload? Body (KEYWORD_else Payload? Body)?
+ /// IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload?
fn parseIf(p: *Parser, comptime bodyParseFn: fn (p: *Parser) Error!Node.Index) !Node.Index {
const if_token = p.eatToken(.keyword_if) orelse return null_node;
_ = try p.expectToken(.l_paren);
tools/extract-grammar.zig
@@ -0,0 +1,100 @@
+//! Extract the "de facto" Zig Grammar from the parser in lib/std/zig/parse.zig.
+//!
+//! The generated file must be edited by hand, in order to remove normal doc-comments.
+
+const std = @import("std");
+const fs = std.fs;
+const heap = std.heap;
+const io = std.io;
+const mem = std.mem;
+const process = std.process;
+const zig = std.zig;
+
+const Buffer = struct {
+ const buf_size = 4096;
+
+ buf: [buf_size]u8 = undefined,
+ pos: usize = 0,
+
+ pub fn append(self: *Buffer, src: []const u8) !void {
+ if (self.pos + src.len > buf_size) {
+ return error.BufferOverflow;
+ }
+
+ mem.copy(u8, self.buf[self.pos..buf_size], src);
+ self.pos += src.len;
+ }
+
+ pub fn reset(self: *Buffer) void {
+ self.pos = 0;
+ }
+
+ pub fn slice(self: *Buffer) []const u8 {
+ return self.buf[0..self.pos];
+ }
+};
+
+/// There are many assumptions in the entire codebase that Zig source files can
+/// be byte-indexed with a u32 integer.
+const max_src_size = std.math.maxInt(u32);
+
+var stdout = io.getStdOut().writer();
+
+pub fn main() !void {
+ var arena = heap.ArenaAllocator.init(heap.page_allocator);
+ defer arena.deinit(); // NOTE(mperillo): Can be removed.
+
+ const allocator = arena.allocator();
+
+ var args_it = try process.argsWithAllocator(allocator);
+ _ = args_it.skip(); // it is safe to ignore
+
+ const path = args_it.next() orelse return error.SourceFileRequired;
+ const src = try read(path, allocator);
+
+ var tokenizer = zig.Tokenizer.init(src);
+ var buf: Buffer = Buffer{};
+ while (true) {
+ const token = tokenizer.next();
+ switch (token.tag) {
+ .eof => break,
+ .doc_comment => {
+ const line = blk: {
+ // Strip leading whitespace.
+ const len = token.loc.end - token.loc.start;
+ break :blk if (len == 3) src[token.loc.start + 3 .. token.loc.end] else src[token.loc.start + 4 .. token.loc.end];
+ };
+
+ try buf.append(line);
+ try buf.append("\n");
+ },
+ .keyword_fn => {
+ const doc = buf.slice();
+ buf.reset();
+
+ // Check if doc contains a PEG grammar block, so that normal
+ // doc-comments are ignored.
+ if (mem.indexOf(u8, doc, "<-") != null) {
+ // Separate each doc with an empty line. This in turn will
+ // ensure that rules are separate by an empty line.
+ try stdout.print("{s}\n", .{doc});
+ }
+ },
+ else => {},
+ }
+ }
+}
+
+fn read(path: []const u8, allocator: mem.Allocator) ![:0]const u8 {
+ var f = try fs.cwd().openFile(path, .{ .mode = .read_only });
+ defer f.close();
+
+ const st = try f.stat();
+ if (st.size > max_src_size) return error.FileTooBig;
+
+ const src = try allocator.allocSentinel(u8, @intCast(usize, st.size), 0);
+ const n = try f.readAll(src);
+ if (n != st.size) return error.UnexpectedEndOfFile;
+
+ return src;
+}