Commit a805454dea
Changed files (1)
lib
std
json
lib/std/json/scanner.zig
@@ -897,7 +897,7 @@ pub const Scanner = struct {
},
.number_post_dot => {
if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
- switch (try self.expectByte()) {
+ switch (self.input[self.cursor]) {
'0'...'9' => {
self.cursor += 1;
self.state = .number_frac;
@@ -1032,7 +1032,8 @@ pub const Scanner = struct {
return error.BufferUnderrun;
},
.string_backslash => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
'"', '\\', '/' => {
// Since these characters now represent themselves literally,
// we can simply begin the next plaintext slice here.
@@ -1080,7 +1081,8 @@ pub const Scanner = struct {
}
},
.string_backslash_u => {
- const c = try self.expectByte();
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[0] = @as(u16, c - '0') << 12;
@@ -1098,7 +1100,8 @@ pub const Scanner = struct {
continue :state_loop;
},
.string_backslash_u_1 => {
- const c = try self.expectByte();
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[0] |= @as(u16, c - '0') << 8;
@@ -1116,7 +1119,8 @@ pub const Scanner = struct {
continue :state_loop;
},
.string_backslash_u_2 => {
- const c = try self.expectByte();
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[0] |= @as(u16, c - '0') << 4;
@@ -1134,7 +1138,8 @@ pub const Scanner = struct {
continue :state_loop;
},
.string_backslash_u_3 => {
- const c = try self.expectByte();
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[0] |= c - '0';
@@ -1160,7 +1165,8 @@ pub const Scanner = struct {
}
},
.string_surrogate_half => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
'\\' => {
self.cursor += 1;
self.state = .string_surrogate_half_backslash;
@@ -1170,7 +1176,8 @@ pub const Scanner = struct {
}
},
.string_surrogate_half_backslash => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
'u' => {
self.cursor += 1;
self.state = .string_surrogate_half_backslash_u;
@@ -1180,7 +1187,8 @@ pub const Scanner = struct {
}
},
.string_surrogate_half_backslash_u => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
'D', 'd' => {
self.cursor += 1;
self.utf16_code_units[1] = 0xD << 12;
@@ -1191,7 +1199,8 @@ pub const Scanner = struct {
}
},
.string_surrogate_half_backslash_u_1 => {
- const c = try self.expectByte();
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
switch (c) {
'C'...'F' => {
self.cursor += 1;
@@ -1209,7 +1218,8 @@ pub const Scanner = struct {
}
},
.string_surrogate_half_backslash_u_2 => {
- const c = try self.expectByte();
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.cursor += 1;
@@ -1233,7 +1243,8 @@ pub const Scanner = struct {
}
},
.string_surrogate_half_backslash_u_3 => {
- const c = try self.expectByte();
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[1] |= c - '0';
@@ -1254,7 +1265,8 @@ pub const Scanner = struct {
},
.string_utf8_last_byte => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
0x80...0xBF => {
self.cursor += 1;
self.state = .string;
@@ -1264,7 +1276,8 @@ pub const Scanner = struct {
}
},
.string_utf8_second_to_last_byte => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
0x80...0xBF => {
self.cursor += 1;
self.state = .string_utf8_last_byte;
@@ -1274,7 +1287,8 @@ pub const Scanner = struct {
}
},
.string_utf8_second_to_last_byte_guard_against_overlong => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
0xA0...0xBF => {
self.cursor += 1;
self.state = .string_utf8_last_byte;
@@ -1284,7 +1298,8 @@ pub const Scanner = struct {
}
},
.string_utf8_second_to_last_byte_guard_against_surrogate_half => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
0x80...0x9F => {
self.cursor += 1;
self.state = .string_utf8_last_byte;
@@ -1294,7 +1309,8 @@ pub const Scanner = struct {
}
},
.string_utf8_third_to_last_byte => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
0x80...0xBF => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte;
@@ -1304,7 +1320,8 @@ pub const Scanner = struct {
}
},
.string_utf8_third_to_last_byte_guard_against_overlong => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
0x90...0xBF => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte;
@@ -1314,7 +1331,8 @@ pub const Scanner = struct {
}
},
.string_utf8_third_to_last_byte_guard_against_too_large => {
- switch (try self.expectByte()) {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
0x80...0x8F => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte;
@@ -1666,6 +1684,17 @@ pub const Scanner = struct {
self.value_start = self.cursor;
return slice;
}
+ fn takeValueSliceMinusTrailingOffset(self: *@This(), trailing_negative_offset: usize) []const u8 {
+ // Check if the escape sequence started before the current input buffer.
+ // (The algebra here is awkward to avoid unsigned underflow,
+ // but it's just making sure the slice on the next line isn't UB.)
+ if (self.cursor <= self.value_start + trailing_negative_offset) return "";
+ const slice = self.input[self.value_start .. self.cursor - trailing_negative_offset];
+ // When trailing_negative_offset is non-zero, setting self.value_start doesn't matter,
+ // because we always set it again while emitting the .partial_string_escaped_*.
+ self.value_start = self.cursor;
+ return slice;
+ }
fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token {
const slice = self.takeValueSlice();
@@ -1678,6 +1707,39 @@ pub const Scanner = struct {
return Token{ .partial_number = slice };
}
+ fn endOfBufferInString(self: *@This()) !Token {
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ const slice = self.takeValueSliceMinusTrailingOffset(switch (self.state) {
+ // Don't include the escape sequence in the partial string.
+ .string_backslash => 1,
+ .string_backslash_u => 2,
+ .string_backslash_u_1 => 3,
+ .string_backslash_u_2 => 4,
+ .string_backslash_u_3 => 5,
+ .string_surrogate_half => 6,
+ .string_surrogate_half_backslash => 7,
+ .string_surrogate_half_backslash_u => 8,
+ .string_surrogate_half_backslash_u_1 => 9,
+ .string_surrogate_half_backslash_u_2 => 10,
+ .string_surrogate_half_backslash_u_3 => 11,
+
+ // Include everything up to the cursor otherwise.
+ .string,
+ .string_utf8_last_byte,
+ .string_utf8_second_to_last_byte,
+ .string_utf8_second_to_last_byte_guard_against_overlong,
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half,
+ .string_utf8_third_to_last_byte,
+ .string_utf8_third_to_last_byte_guard_against_overlong,
+ .string_utf8_third_to_last_byte_guard_against_too_large,
+ => 0,
+
+ else => unreachable,
+ });
+ if (slice.len == 0) return error.BufferUnderrun;
+ return Token{ .partial_string = slice };
+ }
+
fn partialStringCodepoint(code_point: u21) Token {
var buf: [4]u8 = undefined;
switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) {