Commit 302e156523
Changed files (1)
src-self-hosted
src-self-hosted/DepTokenizer.zig
@@ -1,9 +1,7 @@
const Tokenizer = @This();
-arena: std.heap.ArenaAllocator,
index: usize,
bytes: []const u8,
-error_text: []const u8,
state: State,
const std = @import("std");
@@ -12,11 +10,9 @@ const assert = std.debug.assert;
pub fn init(allocator: *std.mem.Allocator, bytes: []const u8) Tokenizer {
return Tokenizer{
- .arena = std.heap.ArenaAllocator.init(allocator),
.index = 0,
.bytes = bytes,
- .error_text = "",
- .state = State{ .lhs = {} },
+ .state = .lhs,
};
}
@@ -24,339 +20,306 @@ pub fn deinit(self: *Tokenizer) void {
self.arena.deinit();
}
-pub fn next(self: *Tokenizer) Error!?Token {
+pub fn next(self: *Tokenizer) ?Token {
+ var start = self.index;
+ var must_resolve = false;
while (self.index < self.bytes.len) {
const char = self.bytes[self.index];
- while (true) {
switch (self.state) {
.lhs => switch (char) {
'\t', '\n', '\r', ' ' => {
// silently ignore whitespace
- break; // advance
+ self.index += 1;
},
else => {
- self.state = State{ .target = try std.ArrayListSentineled(u8, 0).initSize(&self.arena.allocator, 0) };
+ start = self.index;
+ self.state = .target;
},
},
- .target => |*target| switch (char) {
+ .target => switch (char) {
'\t', '\n', '\r', ' ' => {
- return self.errorIllegalChar(self.index, char, "invalid target", .{});
+ return errorIllegalChar(.invalid_target, self.index, char);
},
'$' => {
- self.state = State{ .target_dollar_sign = target.* };
- break; // advance
+ self.state = .target_dollar_sign;
+ self.index += 1;
},
'\\' => {
- self.state = State{ .target_reverse_solidus = target.* };
- break; // advance
+ self.state = .target_reverse_solidus;
+ self.index += 1;
},
':' => {
- self.state = State{ .target_colon = target.* };
- break; // advance
+ self.state = .target_colon;
+ self.index += 1;
},
else => {
- try target.append(char);
- break; // advance
+ self.index += 1;
},
},
- .target_reverse_solidus => |*target| switch (char) {
+ .target_reverse_solidus => switch (char) {
'\t', '\n', '\r' => {
- return self.errorIllegalChar(self.index, char, "bad target escape", .{});
+ return errorIllegalChar(.bad_target_escape, self.index, char);
},
' ', '#', '\\' => {
- try target.append(char);
- self.state = State{ .target = target.* };
- break; // advance
+ must_resolve = true;
+ self.state = .target;
+ self.index += 1;
},
'$' => {
- try target.appendSlice(self.bytes[self.index - 1 .. self.index]);
- self.state = State{ .target_dollar_sign = target.* };
- break; // advance
+ self.state = .target_dollar_sign;
+ self.index += 1;
},
else => {
- try target.appendSlice(self.bytes[self.index - 1 .. self.index + 1]);
- self.state = State{ .target = target.* };
- break; // advance
+ self.state = .target;
+ self.index += 1;
},
},
- .target_dollar_sign => |*target| switch (char) {
+ .target_dollar_sign => switch (char) {
'$' => {
- try target.append(char);
- self.state = State{ .target = target.* };
- break; // advance
+ must_resolve = true;
+ self.state = .target;
+ self.index += 1;
},
else => {
- return self.errorIllegalChar(self.index, char, "expecting '$'", .{});
+ return errorIllegalChar(.expected_dollar_sign, self.index, char);
},
},
- .target_colon => |*target| switch (char) {
+ .target_colon => switch (char) {
'\n', '\r' => {
- const bytes = target.span();
+ const bytes = self.bytes[start..self.index - 1];
if (bytes.len != 0) {
- self.state = State{ .lhs = {} };
- return Token{ .id = .target, .bytes = bytes };
+ self.state = .lhs;
+ return finishTarget(must_resolve, bytes);
}
// silently ignore null target
- self.state = State{ .lhs = {} };
- continue;
+ self.state = .lhs;
},
'\\' => {
- self.state = State{ .target_colon_reverse_solidus = target.* };
- break; // advance
+ self.state = .target_colon_reverse_solidus;
+ self.index += 1;
},
else => {
- const bytes = target.span();
+ const bytes = self.bytes[start..self.index - 1];
if (bytes.len != 0) {
- self.state = State{ .rhs = {} };
- return Token{ .id = .target, .bytes = bytes };
+ self.state = .rhs;
+ return finishTarget(must_resolve, bytes);
}
// silently ignore null target
- self.state = State{ .lhs = {} };
- continue;
+ self.state = .lhs;
},
},
- .target_colon_reverse_solidus => |*target| switch (char) {
+ .target_colon_reverse_solidus => switch (char) {
'\n', '\r' => {
- const bytes = target.span();
+ const bytes = self.bytes[start .. self.index - 2];
if (bytes.len != 0) {
- self.state = State{ .lhs = {} };
- return Token{ .id = .target, .bytes = bytes };
+ self.state = .lhs;
+ return finishTarget(must_resolve, bytes);
}
// silently ignore null target
- self.state = State{ .lhs = {} };
- continue;
+ self.state = .lhs;
},
else => {
- try target.appendSlice(self.bytes[self.index - 2 .. self.index + 1]);
- self.state = State{ .target = target.* };
- break;
+ self.state = .target;
},
},
.rhs => switch (char) {
'\t', ' ' => {
// silently ignore horizontal whitespace
- break; // advance
+ self.index += 1;
},
'\n', '\r' => {
- self.state = State{ .lhs = {} };
- continue;
+ self.state = .lhs;
},
'\\' => {
- self.state = State{ .rhs_continuation = {} };
- break; // advance
+ self.state = .rhs_continuation;
+ self.index += 1;
},
'"' => {
- self.state = State{ .prereq_quote = try std.ArrayListSentineled(u8, 0).initSize(&self.arena.allocator, 0) };
- break; // advance
+ self.state = .prereq_quote;
+ self.index += 1;
+ start = self.index;
},
else => {
- self.state = State{ .prereq = try std.ArrayListSentineled(u8, 0).initSize(&self.arena.allocator, 0) };
+ start = self.index;
+ self.state = .prereq;
},
},
.rhs_continuation => switch (char) {
'\n' => {
- self.state = State{ .rhs = {} };
- break; // advance
+ self.state = .rhs;
+ self.index += 1;
},
'\r' => {
- self.state = State{ .rhs_continuation_linefeed = {} };
- break; // advance
+ self.state = .rhs_continuation_linefeed;
+ self.index += 1;
},
else => {
- return self.errorIllegalChar(self.index, char, "continuation expecting end-of-line", .{});
+ return errorIllegalChar(.continuation_eol, self.index, char);
},
},
.rhs_continuation_linefeed => switch (char) {
'\n' => {
- self.state = State{ .rhs = {} };
- break; // advance
+ self.state = .rhs;
+ self.index += 1;
},
else => {
- return self.errorIllegalChar(self.index, char, "continuation expecting end-of-line", .{});
+ return errorIllegalChar(.continuation_eol, self.index, char);
},
},
- .prereq_quote => |*prereq| switch (char) {
+ .prereq_quote => switch (char) {
'"' => {
- const bytes = prereq.span();
self.index += 1;
- self.state = State{ .rhs = {} };
- return Token{ .id = .prereq, .bytes = bytes };
+ self.state = .rhs;
+ return Token{ .prereq = self.bytes[start .. self.index - 1] };
},
else => {
- try prereq.append(char);
- break; // advance
+ self.index += 1;
},
},
- .prereq => |*prereq| switch (char) {
+ .prereq => switch (char) {
'\t', ' ' => {
- const bytes = prereq.span();
- self.state = State{ .rhs = {} };
- return Token{ .id = .prereq, .bytes = bytes };
+ self.state = .rhs;
+ return Token{ .prereq = self.bytes[start..self.index] };
},
'\n', '\r' => {
- const bytes = prereq.span();
- self.state = State{ .lhs = {} };
- return Token{ .id = .prereq, .bytes = bytes };
+ self.state = .lhs;
+ return Token{ .prereq = self.bytes[start..self.index] };
},
'\\' => {
- self.state = State{ .prereq_continuation = prereq.* };
- break; // advance
+ self.state = .prereq_continuation;
+ self.index += 1;
},
else => {
- try prereq.append(char);
- break; // advance
+ self.index += 1;
},
},
- .prereq_continuation => |*prereq| switch (char) {
+ .prereq_continuation => switch (char) {
'\n' => {
- const bytes = prereq.span();
self.index += 1;
- self.state = State{ .rhs = {} };
- return Token{ .id = .prereq, .bytes = bytes };
+ self.state = .rhs;
+ return Token{ .prereq = self.bytes[start .. self.index - 2] };
},
'\r' => {
- self.state = State{ .prereq_continuation_linefeed = prereq.* };
- break; // advance
+ self.state = .prereq_continuation_linefeed;
+ self.index += 1;
},
else => {
// not continuation
- try prereq.appendSlice(self.bytes[self.index - 1 .. self.index + 1]);
- self.state = State{ .prereq = prereq.* };
- break; // advance
+ self.state = .prereq;
+ self.index += 1;
},
},
- .prereq_continuation_linefeed => |prereq| switch (char) {
+ .prereq_continuation_linefeed => switch (char) {
'\n' => {
- const bytes = prereq.span();
self.index += 1;
- self.state = State{ .rhs = {} };
- return Token{ .id = .prereq, .bytes = bytes };
+ self.state = .rhs;
+ return Token{ .prereq = self.bytes[start .. self.index - 1] };
},
else => {
- return self.errorIllegalChar(self.index, char, "continuation expecting end-of-line", .{});
+ return errorIllegalChar(.continuation_eol, self.index, char);
},
},
}
- }
- self.index += 1;
- }
-
- // eof, handle maybe incomplete token
- if (self.index == 0) return null;
- const idx = self.index - 1;
+ } else {
switch (self.state) {
.lhs,
.rhs,
.rhs_continuation,
.rhs_continuation_linefeed,
- => {},
- .target => |target| {
- return self.errorPosition(idx, target.span(), "incomplete target", .{});
+ => return null,
+ .target => {
+ return Token{ .incomplete_target = self.bytes[start..] };
},
.target_reverse_solidus,
.target_dollar_sign,
=> {
- const index = self.index - 1;
- return self.errorIllegalChar(idx, self.bytes[idx], "incomplete escape", .{});
+ const idx = self.index - 1;
+ return errorIllegalChar(.incomplete_escape, idx, self.bytes[idx]);
},
- .target_colon => |target| {
- const bytes = target.span();
+ .target_colon => {
+ const bytes = self.bytes[start.. self.index - 1];
if (bytes.len != 0) {
self.index += 1;
- self.state = State{ .rhs = {} };
- return Token{ .id = .target, .bytes = bytes };
+ self.state = .rhs;
+ return finishTarget(must_resolve, bytes);
}
// silently ignore null target
- self.state = State{ .lhs = {} };
+ self.state = .lhs;
+ return null;
},
- .target_colon_reverse_solidus => |target| {
- const bytes = target.span();
+ .target_colon_reverse_solidus => {
+ const bytes = self.bytes[start..self.index - 2];
if (bytes.len != 0) {
self.index += 1;
- self.state = State{ .rhs = {} };
- return Token{ .id = .target, .bytes = bytes };
+ self.state = .rhs;
+ return finishTarget(must_resolve, bytes);
}
// silently ignore null target
- self.state = State{ .lhs = {} };
+ self.state = .lhs;
+ return null;
},
- .prereq_quote => |prereq| {
- return self.errorPosition(idx, prereq.span(), "incomplete quoted prerequisite", .{});
+ .prereq_quote => {
+ return Token{ .incomplete_quoted_prerequisite = self.bytes[start..] };
},
- .prereq => |prereq| {
- const bytes = prereq.span();
- self.state = State{ .lhs = {} };
- return Token{ .id = .prereq, .bytes = bytes };
+ .prereq => {
+ self.state = .lhs;
+ return Token{ .prereq = self.bytes[start..] };
},
- .prereq_continuation => |prereq| {
- const bytes = prereq.span();
- self.state = State{ .lhs = {} };
- return Token{ .id = .prereq, .bytes = bytes };
+ .prereq_continuation => {
+ self.state = .lhs;
+ return Token{ .prereq = self.bytes[start.. self.index - 1] };
},
- .prereq_continuation_linefeed => |prereq| {
- const bytes = prereq.span();
- self.state = State{ .lhs = {} };
- return Token{ .id = .prereq, .bytes = bytes };
+ .prereq_continuation_linefeed => {
+ self.state = .lhs;
+ return Token{ .prereq = self.bytes[start.. self.index - 2] };
},
}
- return null;
+}
+ unreachable;
}
-fn errorf(self: *Tokenizer, comptime fmt: []const u8, args: anytype) Error {
- self.error_text = try std.fmt.allocPrintZ(&self.arena.allocator, fmt, args);
- return Error.InvalidInput;
+fn errorIllegalChar(comptime id: @TagType(Token), index: usize, char: u8) Token {
+ return @unionInit(Token, @tagName(id), .{ .index = index, .char = char });
}
-fn errorPosition(self: *Tokenizer, position: usize, bytes: []const u8, comptime fmt: []const u8, args: anytype) Error {
- var buffer = std.ArrayList(u8).init(&self.arena.allocator);
- try buffer.outStream().print(fmt, args);
- try buffer.appendSlice(" '");
- const out = buffer.writer();
- try printCharValues(out, bytes);
- try buffer.appendSlice("'");
- try buffer.outStream().print(" at position {}", .{position - (bytes.len - 1)});
- try buffer.append(0);
- self.error_text = buffer.items[0 .. buffer.items.len - 1 :0];
- return Error.InvalidInput;
-}
-
-fn errorIllegalChar(self: *Tokenizer, position: usize, char: u8, comptime fmt: []const u8, args: anytype) Error {
- var buffer = try std.ArrayListSentineled(u8, 0).initSize(&self.arena.allocator, 0);
- try buffer.appendSlice("illegal char ");
- try printUnderstandableChar(&buffer, char);
- try buffer.outStream().print(" at position {}", .{position});
- if (fmt.len != 0) try buffer.outStream().print(": " ++ fmt, args);
- self.error_text = buffer.span();
- return Error.InvalidInput;
-}
-
-const Error = error{
- OutOfMemory,
- InvalidInput,
-};
+fn finishTarget(must_resolve: bool, bytes: []const u8) Token {
+ return if (must_resolve)
+ .{ .target_must_resolve = bytes }
+ else
+ .{ .target = bytes };
+}
-const State = union(enum) {
- lhs: void,
- target: std.ArrayListSentineled(u8, 0),
- target_reverse_solidus: std.ArrayListSentineled(u8, 0),
- target_dollar_sign: std.ArrayListSentineled(u8, 0),
- target_colon: std.ArrayListSentineled(u8, 0),
- target_colon_reverse_solidus: std.ArrayListSentineled(u8, 0),
- rhs: void,
- rhs_continuation: void,
- rhs_continuation_linefeed: void,
- prereq_quote: std.ArrayListSentineled(u8, 0),
- prereq: std.ArrayListSentineled(u8, 0),
- prereq_continuation: std.ArrayListSentineled(u8, 0),
- prereq_continuation_linefeed: std.ArrayListSentineled(u8, 0),
+const State = enum {
+ lhs,
+ target,
+ target_reverse_solidus,
+ target_dollar_sign,
+ target_colon,
+ target_colon_reverse_solidus,
+ rhs,
+ rhs_continuation,
+ rhs_continuation_linefeed,
+ prereq_quote,
+ prereq,
+ prereq_continuation,
+ prereq_continuation_linefeed,
};
-pub const Token = struct {
- id: ID,
- bytes: []const u8,
-
- pub const ID = enum {
- target,
- prereq,
+pub const Token = union(enum) {
+ target: []const u8,
+ target_must_resolve: []const u8,
+ prereq: []const u8,
+ incomplete_quoted_prerequisite: []const u8,
+ incomplete_target: []const u8,
+ invalid_target: IndexAndChar,
+ bad_target_escape: IndexAndChar,
+ expected_dollar_sign: IndexAndChar,
+ continuation_eol: IndexAndChar,
+ incomplete_escape: IndexAndChar,
+
+ pub const IndexAndChar = struct {
+ index: usize,
+ char: u8,
};
};
@@ -845,26 +808,24 @@ fn depTokenizer(input: []const u8, expect: []const u8) !void {
var it = Tokenizer.init(arena, input);
var buffer = try std.ArrayListSentineled(u8, 0).initSize(arena, 0);
var i: usize = 0;
- while (true) {
- const r = it.next() catch |err| {
- switch (err) {
- Tokenizer.Error.InvalidInput => {
- if (i != 0) try buffer.appendSlice("\n");
- try buffer.appendSlice("ERROR: ");
- try buffer.appendSlice(it.error_text);
- },
- else => return err,
- }
- break;
- };
- const token = r orelse break;
+ while (it.next()) |token| {
if (i != 0) try buffer.appendSlice("\n");
- try buffer.appendSlice(@tagName(token.id));
+ switch (token) {
+ .target, .prereq => |bytes| {
+ try buffer.appendSlice(@tagName(token));
try buffer.appendSlice(" = {");
- for (token.bytes) |b| {
+ for (bytes) |b| {
try buffer.append(printable_char_tab[b]);
}
try buffer.appendSlice("}");
+ },
+ .target_must_resolve => {
+ @panic("TODO");
+ },
+ else => {
+ @panic("TODO");
+ },
+ }
i += 1;
}
const got: []const u8 = buffer.span();