Commit 018b743c7a
Changed files (20)
lib
lib/std/json/dynamic.zig
@@ -0,0 +1,344 @@
+const std = @import("std");
+const debug = std.debug;
+const ArenaAllocator = std.heap.ArenaAllocator;
+const ArrayList = std.ArrayList;
+const StringArrayHashMap = std.StringArrayHashMap;
+const Allocator = std.mem.Allocator;
+
+const StringifyOptions = @import("./stringify.zig").StringifyOptions;
+const stringify = @import("./stringify.zig").stringify;
+
+const JsonScanner = @import("./scanner.zig").Scanner;
+const AllocWhen = @import("./scanner.zig").AllocWhen;
+const Token = @import("./scanner.zig").Token;
+const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger;
+
+pub const ValueTree = struct {
+ arena: *ArenaAllocator,
+ root: Value,
+
+ pub fn deinit(self: *ValueTree) void {
+ self.arena.deinit();
+ self.arena.child_allocator.destroy(self.arena);
+ }
+};
+
+pub const ObjectMap = StringArrayHashMap(Value);
+pub const Array = ArrayList(Value);
+
+/// Represents a JSON value
+/// Currently only supports numbers that fit into i64 or f64.
+pub const Value = union(enum) {
+ null,
+ bool: bool,
+ integer: i64,
+ float: f64,
+ number_string: []const u8,
+ string: []const u8,
+ array: Array,
+ object: ObjectMap,
+
+ pub fn jsonStringify(
+ value: @This(),
+ options: StringifyOptions,
+ out_stream: anytype,
+ ) @TypeOf(out_stream).Error!void {
+ switch (value) {
+ .null => try stringify(null, options, out_stream),
+ .bool => |inner| try stringify(inner, options, out_stream),
+ .integer => |inner| try stringify(inner, options, out_stream),
+ .float => |inner| try stringify(inner, options, out_stream),
+ .number_string => |inner| try out_stream.writeAll(inner),
+ .string => |inner| try stringify(inner, options, out_stream),
+ .array => |inner| try stringify(inner.items, options, out_stream),
+ .object => |inner| {
+ try out_stream.writeByte('{');
+ var field_output = false;
+ var child_options = options;
+ child_options.whitespace.indent_level += 1;
+ var it = inner.iterator();
+ while (it.next()) |entry| {
+ if (!field_output) {
+ field_output = true;
+ } else {
+ try out_stream.writeByte(',');
+ }
+ try child_options.whitespace.outputIndent(out_stream);
+
+ try stringify(entry.key_ptr.*, options, out_stream);
+ try out_stream.writeByte(':');
+ if (child_options.whitespace.separator) {
+ try out_stream.writeByte(' ');
+ }
+ try stringify(entry.value_ptr.*, child_options, out_stream);
+ }
+ if (field_output) {
+ try options.whitespace.outputIndent(out_stream);
+ }
+ try out_stream.writeByte('}');
+ },
+ }
+ }
+
+ pub fn dump(self: Value) void {
+ std.debug.getStderrMutex().lock();
+ defer std.debug.getStderrMutex().unlock();
+
+ const stderr = std.io.getStdErr().writer();
+ stringify(self, .{}, stderr) catch return;
+ }
+};
+
+/// A non-stream JSON parser which constructs a tree of Value's.
+pub const Parser = struct {
+ allocator: Allocator,
+ state: State,
+ alloc_when: AllocWhen,
+ // Stores parent nodes and un-combined Values.
+ stack: Array,
+
+ const State = enum {
+ object_key,
+ object_value,
+ array_value,
+ simple,
+ };
+
+ pub fn init(allocator: Allocator, alloc_when: AllocWhen) Parser {
+ return Parser{
+ .allocator = allocator,
+ .state = .simple,
+ .alloc_when = alloc_when,
+ .stack = Array.init(allocator),
+ };
+ }
+
+ pub fn deinit(p: *Parser) void {
+ p.stack.deinit();
+ }
+
+ pub fn reset(p: *Parser) void {
+ p.state = .simple;
+ p.stack.shrinkRetainingCapacity(0);
+ }
+
+ pub fn parse(p: *Parser, input: []const u8) !ValueTree {
+ var scanner = JsonScanner.initCompleteInput(p.allocator, input);
+ defer scanner.deinit();
+
+ var arena = try p.allocator.create(ArenaAllocator);
+ errdefer p.allocator.destroy(arena);
+
+ arena.* = ArenaAllocator.init(p.allocator);
+ errdefer arena.deinit();
+
+ const allocator = arena.allocator();
+
+ while (true) {
+ const token = try scanner.nextAlloc(allocator, p.alloc_when);
+ if (token == .end_of_document) break;
+ try p.transition(allocator, token);
+ }
+
+ debug.assert(p.stack.items.len == 1);
+
+ return ValueTree{
+ .arena = arena,
+ .root = p.stack.items[0],
+ };
+ }
+
+ // Even though p.allocator exists, we take an explicit allocator so that allocation state
+ // can be cleaned up on error correctly during a `parse` on call.
+ fn transition(p: *Parser, allocator: Allocator, token: Token) !void {
+ switch (p.state) {
+ .object_key => switch (token) {
+ .object_end => {
+ if (p.stack.items.len == 1) {
+ return;
+ }
+
+ var value = p.stack.pop();
+ try p.pushToParent(&value);
+ },
+ .string => |s| {
+ try p.stack.append(Value{ .string = s });
+ p.state = .object_value;
+ },
+ .allocated_string => |s| {
+ try p.stack.append(Value{ .string = s });
+ p.state = .object_value;
+ },
+ else => unreachable,
+ },
+ .object_value => {
+ var object = &p.stack.items[p.stack.items.len - 2].object;
+ var key = p.stack.items[p.stack.items.len - 1].string;
+
+ switch (token) {
+ .object_begin => {
+ try p.stack.append(Value{ .object = ObjectMap.init(allocator) });
+ p.state = .object_key;
+ },
+ .array_begin => {
+ try p.stack.append(Value{ .array = Array.init(allocator) });
+ p.state = .array_value;
+ },
+ .string => |s| {
+ try object.put(key, Value{ .string = s });
+ _ = p.stack.pop();
+ p.state = .object_key;
+ },
+ .allocated_string => |s| {
+ try object.put(key, Value{ .string = s });
+ _ = p.stack.pop();
+ p.state = .object_key;
+ },
+ .number => |slice| {
+ try object.put(key, try p.parseNumber(slice));
+ _ = p.stack.pop();
+ p.state = .object_key;
+ },
+ .allocated_number => |slice| {
+ try object.put(key, try p.parseNumber(slice));
+ _ = p.stack.pop();
+ p.state = .object_key;
+ },
+ .true => {
+ try object.put(key, Value{ .bool = true });
+ _ = p.stack.pop();
+ p.state = .object_key;
+ },
+ .false => {
+ try object.put(key, Value{ .bool = false });
+ _ = p.stack.pop();
+ p.state = .object_key;
+ },
+ .null => {
+ try object.put(key, .null);
+ _ = p.stack.pop();
+ p.state = .object_key;
+ },
+ .object_end, .array_end, .end_of_document => unreachable,
+ .partial_number, .partial_string, .partial_string_escaped_1, .partial_string_escaped_2, .partial_string_escaped_3, .partial_string_escaped_4 => unreachable,
+ }
+ },
+ .array_value => {
+ var array = &p.stack.items[p.stack.items.len - 1].array;
+
+ switch (token) {
+ .array_end => {
+ if (p.stack.items.len == 1) {
+ return;
+ }
+
+ var value = p.stack.pop();
+ try p.pushToParent(&value);
+ },
+ .object_begin => {
+ try p.stack.append(Value{ .object = ObjectMap.init(allocator) });
+ p.state = .object_key;
+ },
+ .array_begin => {
+ try p.stack.append(Value{ .array = Array.init(allocator) });
+ p.state = .array_value;
+ },
+ .string => |s| {
+ try array.append(Value{ .string = s });
+ },
+ .allocated_string => |s| {
+ try array.append(Value{ .string = s });
+ },
+ .number => |slice| {
+ try array.append(try p.parseNumber(slice));
+ },
+ .allocated_number => |slice| {
+ try array.append(try p.parseNumber(slice));
+ },
+ .true => {
+ try array.append(Value{ .bool = true });
+ },
+ .false => {
+ try array.append(Value{ .bool = false });
+ },
+ .null => {
+ try array.append(.null);
+ },
+ .object_end, .end_of_document => unreachable,
+ .partial_number, .partial_string, .partial_string_escaped_1, .partial_string_escaped_2, .partial_string_escaped_3, .partial_string_escaped_4 => unreachable,
+ }
+ },
+ .simple => switch (token) {
+ .object_begin => {
+ try p.stack.append(Value{ .object = ObjectMap.init(allocator) });
+ p.state = .object_key;
+ },
+ .array_begin => {
+ try p.stack.append(Value{ .array = Array.init(allocator) });
+ p.state = .array_value;
+ },
+ .string => |s| {
+ try p.stack.append(Value{ .string = s });
+ },
+ .allocated_string => |s| {
+ try p.stack.append(Value{ .string = s });
+ },
+ .number => |slice| {
+ try p.stack.append(try p.parseNumber(slice));
+ },
+ .allocated_number => |slice| {
+ try p.stack.append(try p.parseNumber(slice));
+ },
+ .true => {
+ try p.stack.append(Value{ .bool = true });
+ },
+ .false => {
+ try p.stack.append(Value{ .bool = false });
+ },
+ .null => {
+ try p.stack.append(.null);
+ },
+ .object_end, .array_end, .end_of_document => unreachable,
+ .partial_number, .partial_string, .partial_string_escaped_1, .partial_string_escaped_2, .partial_string_escaped_3, .partial_string_escaped_4 => unreachable,
+ },
+ }
+ }
+
+ fn pushToParent(p: *Parser, value: *const Value) !void {
+ switch (p.stack.items[p.stack.items.len - 1]) {
+ // Object Parent -> [ ..., object, <key>, value ]
+ .string => |key| {
+ _ = p.stack.pop();
+
+ var object = &p.stack.items[p.stack.items.len - 1].object;
+ try object.put(key, value.*);
+ p.state = .object_key;
+ },
+ // Array Parent -> [ ..., <array>, value ]
+ .array => |*array| {
+ try array.append(value.*);
+ p.state = .array_value;
+ },
+ else => {
+ unreachable;
+ },
+ }
+ }
+
+ fn parseNumber(p: *Parser, slice: []const u8) !Value {
+ _ = p;
+ return if (isNumberFormattedLikeAnInteger(slice))
+ Value{
+ .integer = std.fmt.parseInt(i64, slice, 10) catch |e| switch (e) {
+ error.Overflow => return Value{ .number_string = slice },
+ error.InvalidCharacter => |err| return err,
+ },
+ }
+ else
+ Value{ .float = try std.fmt.parseFloat(f64, slice) };
+ }
+};
+
+test {
+ _ = @import("dynamic_test.zig");
+}
lib/std/json/dynamic_test.zig
@@ -0,0 +1,285 @@
+const std = @import("std");
+const mem = std.mem;
+const testing = std.testing;
+
+const ObjectMap = @import("dynamic.zig").ObjectMap;
+const Array = @import("dynamic.zig").Array;
+const Value = @import("dynamic.zig").Value;
+const Parser = @import("dynamic.zig").Parser;
+
+test "json.parser.dynamic" {
+ var p = Parser.init(testing.allocator, .alloc_if_needed);
+ defer p.deinit();
+
+ const s =
+ \\{
+ \\ "Image": {
+ \\ "Width": 800,
+ \\ "Height": 600,
+ \\ "Title": "View from 15th Floor",
+ \\ "Thumbnail": {
+ \\ "Url": "http://www.example.com/image/481989943",
+ \\ "Height": 125,
+ \\ "Width": 100
+ \\ },
+ \\ "Animated" : false,
+ \\ "IDs": [116, 943, 234, 38793],
+ \\ "ArrayOfObject": [{"n": "m"}],
+ \\ "double": 1.3412,
+ \\ "LargeInt": 18446744073709551615
+ \\ }
+ \\}
+ ;
+
+ var tree = try p.parse(s);
+ defer tree.deinit();
+
+ var root = tree.root;
+
+ var image = root.object.get("Image").?;
+
+ const width = image.object.get("Width").?;
+ try testing.expect(width.integer == 800);
+
+ const height = image.object.get("Height").?;
+ try testing.expect(height.integer == 600);
+
+ const title = image.object.get("Title").?;
+ try testing.expect(mem.eql(u8, title.string, "View from 15th Floor"));
+
+ const animated = image.object.get("Animated").?;
+ try testing.expect(animated.bool == false);
+
+ const array_of_object = image.object.get("ArrayOfObject").?;
+ try testing.expect(array_of_object.array.items.len == 1);
+
+ const obj0 = array_of_object.array.items[0].object.get("n").?;
+ try testing.expect(mem.eql(u8, obj0.string, "m"));
+
+ const double = image.object.get("double").?;
+ try testing.expect(double.float == 1.3412);
+
+ const large_int = image.object.get("LargeInt").?;
+ try testing.expect(mem.eql(u8, large_int.number_string, "18446744073709551615"));
+}
+
+const writeStream = @import("./write_stream.zig").writeStream;
+test "write json then parse it" {
+ var out_buffer: [1000]u8 = undefined;
+
+ var fixed_buffer_stream = std.io.fixedBufferStream(&out_buffer);
+ const out_stream = fixed_buffer_stream.writer();
+ var jw = writeStream(out_stream, 4);
+
+ try jw.beginObject();
+
+ try jw.objectField("f");
+ try jw.emitBool(false);
+
+ try jw.objectField("t");
+ try jw.emitBool(true);
+
+ try jw.objectField("int");
+ try jw.emitNumber(1234);
+
+ try jw.objectField("array");
+ try jw.beginArray();
+
+ try jw.arrayElem();
+ try jw.emitNull();
+
+ try jw.arrayElem();
+ try jw.emitNumber(12.34);
+
+ try jw.endArray();
+
+ try jw.objectField("str");
+ try jw.emitString("hello");
+
+ try jw.endObject();
+
+ var parser = Parser.init(testing.allocator, .alloc_if_needed);
+ defer parser.deinit();
+ var tree = try parser.parse(fixed_buffer_stream.getWritten());
+ defer tree.deinit();
+
+ try testing.expect(tree.root.object.get("f").?.bool == false);
+ try testing.expect(tree.root.object.get("t").?.bool == true);
+ try testing.expect(tree.root.object.get("int").?.integer == 1234);
+ try testing.expect(tree.root.object.get("array").?.array.items[0].null == {});
+ try testing.expect(tree.root.object.get("array").?.array.items[1].float == 12.34);
+ try testing.expect(mem.eql(u8, tree.root.object.get("str").?.string, "hello"));
+}
+
+fn testParse(arena_allocator: std.mem.Allocator, json_str: []const u8) !Value {
+ var p = Parser.init(arena_allocator, .alloc_if_needed);
+ return (try p.parse(json_str)).root;
+}
+
+test "parsing empty string gives appropriate error" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ try testing.expectError(error.UnexpectedEndOfInput, testParse(arena_allocator.allocator(), ""));
+}
+
+test "parse tree should not contain dangling pointers" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+
+ var p = Parser.init(arena_allocator.allocator(), .alloc_if_needed);
+ defer p.deinit();
+
+ var tree = try p.parse("[]");
+ defer tree.deinit();
+
+ // Allocation should succeed
+ var i: usize = 0;
+ while (i < 100) : (i += 1) {
+ try tree.root.array.append(Value{ .integer = 100 });
+ }
+ try testing.expectEqual(tree.root.array.items.len, 100);
+}
+
+test "integer after float has proper type" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ const parsed = try testParse(arena_allocator.allocator(),
+ \\{
+ \\ "float": 3.14,
+ \\ "ints": [1, 2, 3]
+ \\}
+ );
+ try std.testing.expect(parsed.object.get("ints").?.array.items[0] == .integer);
+}
+
+test "escaped characters" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ const input =
+ \\{
+ \\ "backslash": "\\",
+ \\ "forwardslash": "\/",
+ \\ "newline": "\n",
+ \\ "carriagereturn": "\r",
+ \\ "tab": "\t",
+ \\ "formfeed": "\f",
+ \\ "backspace": "\b",
+ \\ "doublequote": "\"",
+ \\ "unicode": "\u0105",
+ \\ "surrogatepair": "\ud83d\ude02"
+ \\}
+ ;
+
+ const obj = (try testParse(arena_allocator.allocator(), input)).object;
+
+ try testing.expectEqualSlices(u8, obj.get("backslash").?.string, "\\");
+ try testing.expectEqualSlices(u8, obj.get("forwardslash").?.string, "/");
+ try testing.expectEqualSlices(u8, obj.get("newline").?.string, "\n");
+ try testing.expectEqualSlices(u8, obj.get("carriagereturn").?.string, "\r");
+ try testing.expectEqualSlices(u8, obj.get("tab").?.string, "\t");
+ try testing.expectEqualSlices(u8, obj.get("formfeed").?.string, "\x0C");
+ try testing.expectEqualSlices(u8, obj.get("backspace").?.string, "\x08");
+ try testing.expectEqualSlices(u8, obj.get("doublequote").?.string, "\"");
+ try testing.expectEqualSlices(u8, obj.get("unicode").?.string, "ą");
+ try testing.expectEqualSlices(u8, obj.get("surrogatepair").?.string, "😂");
+}
+
+test "string copy option" {
+ const input =
+ \\{
+ \\ "noescape": "aą😂",
+ \\ "simple": "\\\/\n\r\t\f\b\"",
+ \\ "unicode": "\u0105",
+ \\ "surrogatepair": "\ud83d\ude02"
+ \\}
+ ;
+
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ const allocator = arena_allocator.allocator();
+
+ var parser = Parser.init(allocator, .alloc_if_needed);
+ const tree_nocopy = try parser.parse(input);
+ const obj_nocopy = tree_nocopy.root.object;
+
+ parser = Parser.init(allocator, .alloc_always);
+ const tree_copy = try parser.parse(input);
+ const obj_copy = tree_copy.root.object;
+
+ for ([_][]const u8{ "noescape", "simple", "unicode", "surrogatepair" }) |field_name| {
+ try testing.expectEqualSlices(u8, obj_nocopy.get(field_name).?.string, obj_copy.get(field_name).?.string);
+ }
+
+ const nocopy_addr = &obj_nocopy.get("noescape").?.string[0];
+ const copy_addr = &obj_copy.get("noescape").?.string[0];
+
+ var found_nocopy = false;
+ for (input, 0..) |_, index| {
+ try testing.expect(copy_addr != &input[index]);
+ if (nocopy_addr == &input[index]) {
+ found_nocopy = true;
+ }
+ }
+ try testing.expect(found_nocopy);
+}
+
+test "Value.jsonStringify" {
+ {
+ var buffer: [10]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buffer);
+ try @as(Value, .null).jsonStringify(.{}, fbs.writer());
+ try testing.expectEqualSlices(u8, fbs.getWritten(), "null");
+ }
+ {
+ var buffer: [10]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buffer);
+ try (Value{ .bool = true }).jsonStringify(.{}, fbs.writer());
+ try testing.expectEqualSlices(u8, fbs.getWritten(), "true");
+ }
+ {
+ var buffer: [10]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buffer);
+ try (Value{ .integer = 42 }).jsonStringify(.{}, fbs.writer());
+ try testing.expectEqualSlices(u8, fbs.getWritten(), "42");
+ }
+ {
+ var buffer: [10]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buffer);
+ try (Value{ .number_string = "43" }).jsonStringify(.{}, fbs.writer());
+ try testing.expectEqualSlices(u8, fbs.getWritten(), "43");
+ }
+ {
+ var buffer: [10]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buffer);
+ try (Value{ .float = 42 }).jsonStringify(.{}, fbs.writer());
+ try testing.expectEqualSlices(u8, fbs.getWritten(), "4.2e+01");
+ }
+ {
+ var buffer: [10]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buffer);
+ try (Value{ .string = "weeee" }).jsonStringify(.{}, fbs.writer());
+ try testing.expectEqualSlices(u8, fbs.getWritten(), "\"weeee\"");
+ }
+ {
+ var buffer: [10]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buffer);
+ var vals = [_]Value{
+ .{ .integer = 1 },
+ .{ .integer = 2 },
+ .{ .number_string = "3" },
+ };
+ try (Value{
+ .array = Array.fromOwnedSlice(undefined, &vals),
+ }).jsonStringify(.{}, fbs.writer());
+ try testing.expectEqualSlices(u8, fbs.getWritten(), "[1,2,3]");
+ }
+ {
+ var buffer: [10]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buffer);
+ var obj = ObjectMap.init(testing.allocator);
+ defer obj.deinit();
+ try obj.putNoClobber("a", .{ .string = "b" });
+ try (Value{ .object = obj }).jsonStringify(.{}, fbs.writer());
+ try testing.expectEqualSlices(u8, fbs.getWritten(), "{\"a\":\"b\"}");
+ }
+}
lib/std/json/JSONTestSuite_test.zig
@@ -0,0 +1,960 @@
+// This file was generated by _generate_JSONTestSuite.zig
+// These test cases are sourced from: https://github.com/nst/JSONTestSuite
+const ok = @import("./test.zig").ok;
+const err = @import("./test.zig").err;
+const any = @import("./test.zig").any;
+
+test "i_number_double_huge_neg_exp.json" {
+ try any("[123.456e-789]");
+}
+test "i_number_huge_exp.json" {
+ try any("[0.4e00669999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999969999999006]");
+}
+test "i_number_neg_int_huge_exp.json" {
+ try any("[-1e+9999]");
+}
+test "i_number_pos_double_huge_exp.json" {
+ try any("[1.5e+9999]");
+}
+test "i_number_real_neg_overflow.json" {
+ try any("[-123123e100000]");
+}
+test "i_number_real_pos_overflow.json" {
+ try any("[123123e100000]");
+}
+test "i_number_real_underflow.json" {
+ try any("[123e-10000000]");
+}
+test "i_number_too_big_neg_int.json" {
+ try any("[-123123123123123123123123123123]");
+}
+test "i_number_too_big_pos_int.json" {
+ try any("[100000000000000000000]");
+}
+test "i_number_very_big_negative_int.json" {
+ try any("[-237462374673276894279832749832423479823246327846]");
+}
+test "i_object_key_lone_2nd_surrogate.json" {
+ try any("{\"\\uDFAA\":0}");
+}
+test "i_string_1st_surrogate_but_2nd_missing.json" {
+ try any("[\"\\uDADA\"]");
+}
+test "i_string_1st_valid_surrogate_2nd_invalid.json" {
+ try any("[\"\\uD888\\u1234\"]");
+}
+test "i_string_UTF-16LE_with_BOM.json" {
+ try any("\xff\xfe[\x00\"\x00\xe9\x00\"\x00]\x00");
+}
+test "i_string_UTF-8_invalid_sequence.json" {
+ try any("[\"\xe6\x97\xa5\xd1\x88\xfa\"]");
+}
+test "i_string_UTF8_surrogate_U+D800.json" {
+ try any("[\"\xed\xa0\x80\"]");
+}
+test "i_string_incomplete_surrogate_and_escape_valid.json" {
+ try any("[\"\\uD800\\n\"]");
+}
+test "i_string_incomplete_surrogate_pair.json" {
+ try any("[\"\\uDd1ea\"]");
+}
+test "i_string_incomplete_surrogates_escape_valid.json" {
+ try any("[\"\\uD800\\uD800\\n\"]");
+}
+test "i_string_invalid_lonely_surrogate.json" {
+ try any("[\"\\ud800\"]");
+}
+test "i_string_invalid_surrogate.json" {
+ try any("[\"\\ud800abc\"]");
+}
+test "i_string_invalid_utf-8.json" {
+ try any("[\"\xff\"]");
+}
+test "i_string_inverted_surrogates_U+1D11E.json" {
+ try any("[\"\\uDd1e\\uD834\"]");
+}
+test "i_string_iso_latin_1.json" {
+ try any("[\"\xe9\"]");
+}
+test "i_string_lone_second_surrogate.json" {
+ try any("[\"\\uDFAA\"]");
+}
+test "i_string_lone_utf8_continuation_byte.json" {
+ try any("[\"\x81\"]");
+}
+test "i_string_not_in_unicode_range.json" {
+ try any("[\"\xf4\xbf\xbf\xbf\"]");
+}
+test "i_string_overlong_sequence_2_bytes.json" {
+ try any("[\"\xc0\xaf\"]");
+}
+test "i_string_overlong_sequence_6_bytes.json" {
+ try any("[\"\xfc\x83\xbf\xbf\xbf\xbf\"]");
+}
+test "i_string_overlong_sequence_6_bytes_null.json" {
+ try any("[\"\xfc\x80\x80\x80\x80\x80\"]");
+}
+test "i_string_truncated-utf-8.json" {
+ try any("[\"\xe0\xff\"]");
+}
+test "i_string_utf16BE_no_BOM.json" {
+ try any("\x00[\x00\"\x00\xe9\x00\"\x00]");
+}
+test "i_string_utf16LE_no_BOM.json" {
+ try any("[\x00\"\x00\xe9\x00\"\x00]\x00");
+}
+test "i_structure_500_nested_arrays.json" {
+ try any("[" ** 500 ++ "]" ** 500);
+}
+test "i_structure_UTF-8_BOM_empty_object.json" {
+ try any("\xef\xbb\xbf{}");
+}
+test "n_array_1_true_without_comma.json" {
+ try err("[1 true]");
+}
+test "n_array_a_invalid_utf8.json" {
+ try err("[a\xe5]");
+}
+test "n_array_colon_instead_of_comma.json" {
+ try err("[\"\": 1]");
+}
+test "n_array_comma_after_close.json" {
+ try err("[\"\"],");
+}
+test "n_array_comma_and_number.json" {
+ try err("[,1]");
+}
+test "n_array_double_comma.json" {
+ try err("[1,,2]");
+}
+test "n_array_double_extra_comma.json" {
+ try err("[\"x\",,]");
+}
+test "n_array_extra_close.json" {
+ try err("[\"x\"]]");
+}
+test "n_array_extra_comma.json" {
+ try err("[\"\",]");
+}
+test "n_array_incomplete.json" {
+ try err("[\"x\"");
+}
+test "n_array_incomplete_invalid_value.json" {
+ try err("[x");
+}
+test "n_array_inner_array_no_comma.json" {
+ try err("[3[4]]");
+}
+test "n_array_invalid_utf8.json" {
+ try err("[\xff]");
+}
+test "n_array_items_separated_by_semicolon.json" {
+ try err("[1:2]");
+}
+test "n_array_just_comma.json" {
+ try err("[,]");
+}
+test "n_array_just_minus.json" {
+ try err("[-]");
+}
+test "n_array_missing_value.json" {
+ try err("[ , \"\"]");
+}
+test "n_array_newlines_unclosed.json" {
+ try err("[\"a\",\n4\n,1,");
+}
+test "n_array_number_and_comma.json" {
+ try err("[1,]");
+}
+test "n_array_number_and_several_commas.json" {
+ try err("[1,,]");
+}
+test "n_array_spaces_vertical_tab_formfeed.json" {
+ try err("[\"\x0ba\"\\f]");
+}
+test "n_array_star_inside.json" {
+ try err("[*]");
+}
+test "n_array_unclosed.json" {
+ try err("[\"\"");
+}
+test "n_array_unclosed_trailing_comma.json" {
+ try err("[1,");
+}
+test "n_array_unclosed_with_new_lines.json" {
+ try err("[1,\n1\n,1");
+}
+test "n_array_unclosed_with_object_inside.json" {
+ try err("[{}");
+}
+test "n_incomplete_false.json" {
+ try err("[fals]");
+}
+test "n_incomplete_null.json" {
+ try err("[nul]");
+}
+test "n_incomplete_true.json" {
+ try err("[tru]");
+}
+test "n_multidigit_number_then_00.json" {
+ try err("123\x00");
+}
+test "n_number_++.json" {
+ try err("[++1234]");
+}
+test "n_number_+1.json" {
+ try err("[+1]");
+}
+test "n_number_+Inf.json" {
+ try err("[+Inf]");
+}
+test "n_number_-01.json" {
+ try err("[-01]");
+}
+test "n_number_-1.0..json" {
+ try err("[-1.0.]");
+}
+test "n_number_-2..json" {
+ try err("[-2.]");
+}
+test "n_number_-NaN.json" {
+ try err("[-NaN]");
+}
+test "n_number_.-1.json" {
+ try err("[.-1]");
+}
+test "n_number_.2e-3.json" {
+ try err("[.2e-3]");
+}
+test "n_number_0.1.2.json" {
+ try err("[0.1.2]");
+}
+test "n_number_0.3e+.json" {
+ try err("[0.3e+]");
+}
+test "n_number_0.3e.json" {
+ try err("[0.3e]");
+}
+test "n_number_0.e1.json" {
+ try err("[0.e1]");
+}
+test "n_number_0_capital_E+.json" {
+ try err("[0E+]");
+}
+test "n_number_0_capital_E.json" {
+ try err("[0E]");
+}
+test "n_number_0e+.json" {
+ try err("[0e+]");
+}
+test "n_number_0e.json" {
+ try err("[0e]");
+}
+test "n_number_1.0e+.json" {
+ try err("[1.0e+]");
+}
+test "n_number_1.0e-.json" {
+ try err("[1.0e-]");
+}
+test "n_number_1.0e.json" {
+ try err("[1.0e]");
+}
+test "n_number_1_000.json" {
+ try err("[1 000.0]");
+}
+test "n_number_1eE2.json" {
+ try err("[1eE2]");
+}
+test "n_number_2.e+3.json" {
+ try err("[2.e+3]");
+}
+test "n_number_2.e-3.json" {
+ try err("[2.e-3]");
+}
+test "n_number_2.e3.json" {
+ try err("[2.e3]");
+}
+test "n_number_9.e+.json" {
+ try err("[9.e+]");
+}
+test "n_number_Inf.json" {
+ try err("[Inf]");
+}
+test "n_number_NaN.json" {
+ try err("[NaN]");
+}
+test "n_number_U+FF11_fullwidth_digit_one.json" {
+ try err("[\xef\xbc\x91]");
+}
+test "n_number_expression.json" {
+ try err("[1+2]");
+}
+test "n_number_hex_1_digit.json" {
+ try err("[0x1]");
+}
+test "n_number_hex_2_digits.json" {
+ try err("[0x42]");
+}
+test "n_number_infinity.json" {
+ try err("[Infinity]");
+}
+test "n_number_invalid+-.json" {
+ try err("[0e+-1]");
+}
+test "n_number_invalid-negative-real.json" {
+ try err("[-123.123foo]");
+}
+test "n_number_invalid-utf-8-in-bigger-int.json" {
+ try err("[123\xe5]");
+}
+test "n_number_invalid-utf-8-in-exponent.json" {
+ try err("[1e1\xe5]");
+}
+test "n_number_invalid-utf-8-in-int.json" {
+ try err("[0\xe5]\n");
+}
+test "n_number_minus_infinity.json" {
+ try err("[-Infinity]");
+}
+test "n_number_minus_sign_with_trailing_garbage.json" {
+ try err("[-foo]");
+}
+test "n_number_minus_space_1.json" {
+ try err("[- 1]");
+}
+test "n_number_neg_int_starting_with_zero.json" {
+ try err("[-012]");
+}
+test "n_number_neg_real_without_int_part.json" {
+ try err("[-.123]");
+}
+test "n_number_neg_with_garbage_at_end.json" {
+ try err("[-1x]");
+}
+test "n_number_real_garbage_after_e.json" {
+ try err("[1ea]");
+}
+test "n_number_real_with_invalid_utf8_after_e.json" {
+ try err("[1e\xe5]");
+}
+test "n_number_real_without_fractional_part.json" {
+ try err("[1.]");
+}
+test "n_number_starting_with_dot.json" {
+ try err("[.123]");
+}
+test "n_number_with_alpha.json" {
+ try err("[1.2a-3]");
+}
+test "n_number_with_alpha_char.json" {
+ try err("[1.8011670033376514H-308]");
+}
+test "n_number_with_leading_zero.json" {
+ try err("[012]");
+}
+test "n_object_bad_value.json" {
+ try err("[\"x\", truth]");
+}
+test "n_object_bracket_key.json" {
+ try err("{[: \"x\"}\n");
+}
+test "n_object_comma_instead_of_colon.json" {
+ try err("{\"x\", null}");
+}
+test "n_object_double_colon.json" {
+ try err("{\"x\"::\"b\"}");
+}
+test "n_object_emoji.json" {
+ try err("{\xf0\x9f\x87\xa8\xf0\x9f\x87\xad}");
+}
+test "n_object_garbage_at_end.json" {
+ try err("{\"a\":\"a\" 123}");
+}
+test "n_object_key_with_single_quotes.json" {
+ try err("{key: 'value'}");
+}
+test "n_object_lone_continuation_byte_in_key_and_trailing_comma.json" {
+ try err("{\"\xb9\":\"0\",}");
+}
+test "n_object_missing_colon.json" {
+ try err("{\"a\" b}");
+}
+test "n_object_missing_key.json" {
+ try err("{:\"b\"}");
+}
+test "n_object_missing_semicolon.json" {
+ try err("{\"a\" \"b\"}");
+}
+test "n_object_missing_value.json" {
+ try err("{\"a\":");
+}
+test "n_object_no-colon.json" {
+ try err("{\"a\"");
+}
+test "n_object_non_string_key.json" {
+ try err("{1:1}");
+}
+test "n_object_non_string_key_but_huge_number_instead.json" {
+ try err("{9999E9999:1}");
+}
+test "n_object_repeated_null_null.json" {
+ try err("{null:null,null:null}");
+}
+test "n_object_several_trailing_commas.json" {
+ try err("{\"id\":0,,,,,}");
+}
+test "n_object_single_quote.json" {
+ try err("{'a':0}");
+}
+test "n_object_trailing_comma.json" {
+ try err("{\"id\":0,}");
+}
+test "n_object_trailing_comment.json" {
+ try err("{\"a\":\"b\"}/**/");
+}
+test "n_object_trailing_comment_open.json" {
+ try err("{\"a\":\"b\"}/**//");
+}
+test "n_object_trailing_comment_slash_open.json" {
+ try err("{\"a\":\"b\"}//");
+}
+test "n_object_trailing_comment_slash_open_incomplete.json" {
+ try err("{\"a\":\"b\"}/");
+}
+test "n_object_two_commas_in_a_row.json" {
+ try err("{\"a\":\"b\",,\"c\":\"d\"}");
+}
+test "n_object_unquoted_key.json" {
+ try err("{a: \"b\"}");
+}
+test "n_object_unterminated-value.json" {
+ try err("{\"a\":\"a");
+}
+test "n_object_with_single_string.json" {
+ try err("{ \"foo\" : \"bar\", \"a\" }");
+}
+test "n_object_with_trailing_garbage.json" {
+ try err("{\"a\":\"b\"}#");
+}
+test "n_single_space.json" {
+ try err(" ");
+}
+test "n_string_1_surrogate_then_escape.json" {
+ try err("[\"\\uD800\\\"]");
+}
+test "n_string_1_surrogate_then_escape_u.json" {
+ try err("[\"\\uD800\\u\"]");
+}
+test "n_string_1_surrogate_then_escape_u1.json" {
+ try err("[\"\\uD800\\u1\"]");
+}
+test "n_string_1_surrogate_then_escape_u1x.json" {
+ try err("[\"\\uD800\\u1x\"]");
+}
+test "n_string_accentuated_char_no_quotes.json" {
+ try err("[\xc3\xa9]");
+}
+test "n_string_backslash_00.json" {
+ try err("[\"\\\x00\"]");
+}
+test "n_string_escape_x.json" {
+ try err("[\"\\x00\"]");
+}
+test "n_string_escaped_backslash_bad.json" {
+ try err("[\"\\\\\\\"]");
+}
+test "n_string_escaped_ctrl_char_tab.json" {
+ try err("[\"\\\x09\"]");
+}
+test "n_string_escaped_emoji.json" {
+ try err("[\"\\\xf0\x9f\x8c\x80\"]");
+}
+test "n_string_incomplete_escape.json" {
+ try err("[\"\\\"]");
+}
+test "n_string_incomplete_escaped_character.json" {
+ try err("[\"\\u00A\"]");
+}
+test "n_string_incomplete_surrogate.json" {
+ try err("[\"\\uD834\\uDd\"]");
+}
+test "n_string_incomplete_surrogate_escape_invalid.json" {
+ try err("[\"\\uD800\\uD800\\x\"]");
+}
+test "n_string_invalid-utf-8-in-escape.json" {
+ try err("[\"\\u\xe5\"]");
+}
+test "n_string_invalid_backslash_esc.json" {
+ try err("[\"\\a\"]");
+}
+test "n_string_invalid_unicode_escape.json" {
+ try err("[\"\\uqqqq\"]");
+}
+test "n_string_invalid_utf8_after_escape.json" {
+ try err("[\"\\\xe5\"]");
+}
+test "n_string_leading_uescaped_thinspace.json" {
+ try err("[\\u0020\"asd\"]");
+}
+test "n_string_no_quotes_with_bad_escape.json" {
+ try err("[\\n]");
+}
+test "n_string_single_doublequote.json" {
+ try err("\"");
+}
+test "n_string_single_quote.json" {
+ try err("['single quote']");
+}
+test "n_string_single_string_no_double_quotes.json" {
+ try err("abc");
+}
+test "n_string_start_escape_unclosed.json" {
+ try err("[\"\\");
+}
+test "n_string_unescaped_ctrl_char.json" {
+ try err("[\"a\x00a\"]");
+}
+test "n_string_unescaped_newline.json" {
+ try err("[\"new\nline\"]");
+}
+test "n_string_unescaped_tab.json" {
+ try err("[\"\x09\"]");
+}
+test "n_string_unicode_CapitalU.json" {
+ try err("\"\\UA66D\"");
+}
+test "n_string_with_trailing_garbage.json" {
+ try err("\"\"x");
+}
+test "n_structure_100000_opening_arrays.json" {
+ try err("[" ** 100000);
+}
+test "n_structure_U+2060_word_joined.json" {
+ try err("[\xe2\x81\xa0]");
+}
+test "n_structure_UTF8_BOM_no_data.json" {
+ try err("\xef\xbb\xbf");
+}
+test "n_structure_angle_bracket_..json" {
+ try err("<.>");
+}
+test "n_structure_angle_bracket_null.json" {
+ try err("[<null>]");
+}
+test "n_structure_array_trailing_garbage.json" {
+ try err("[1]x");
+}
+test "n_structure_array_with_extra_array_close.json" {
+ try err("[1]]");
+}
+test "n_structure_array_with_unclosed_string.json" {
+ try err("[\"asd]");
+}
+test "n_structure_ascii-unicode-identifier.json" {
+ try err("a\xc3\xa5");
+}
+test "n_structure_capitalized_True.json" {
+ try err("[True]");
+}
+test "n_structure_close_unopened_array.json" {
+ try err("1]");
+}
+test "n_structure_comma_instead_of_closing_brace.json" {
+ try err("{\"x\": true,");
+}
+test "n_structure_double_array.json" {
+ try err("[][]");
+}
+test "n_structure_end_array.json" {
+ try err("]");
+}
+test "n_structure_incomplete_UTF8_BOM.json" {
+ try err("\xef\xbb{}");
+}
+test "n_structure_lone-invalid-utf-8.json" {
+ try err("\xe5");
+}
+test "n_structure_lone-open-bracket.json" {
+ try err("[");
+}
+test "n_structure_no_data.json" {
+ try err("");
+}
+test "n_structure_null-byte-outside-string.json" {
+ try err("[\x00]");
+}
+test "n_structure_number_with_trailing_garbage.json" {
+ try err("2@");
+}
+test "n_structure_object_followed_by_closing_object.json" {
+ try err("{}}");
+}
+test "n_structure_object_unclosed_no_value.json" {
+ try err("{\"\":");
+}
+test "n_structure_object_with_comment.json" {
+ try err("{\"a\":/*comment*/\"b\"}");
+}
+test "n_structure_object_with_trailing_garbage.json" {
+ try err("{\"a\": true} \"x\"");
+}
+test "n_structure_open_array_apostrophe.json" {
+ try err("['");
+}
+test "n_structure_open_array_comma.json" {
+ try err("[,");
+}
+test "n_structure_open_array_object.json" {
+ try err("[{\"\":" ** 50000 ++ "\n");
+}
+test "n_structure_open_array_open_object.json" {
+ try err("[{");
+}
+test "n_structure_open_array_open_string.json" {
+ try err("[\"a");
+}
+test "n_structure_open_array_string.json" {
+ try err("[\"a\"");
+}
+test "n_structure_open_object.json" {
+ try err("{");
+}
+test "n_structure_open_object_close_array.json" {
+ try err("{]");
+}
+test "n_structure_open_object_comma.json" {
+ try err("{,");
+}
+test "n_structure_open_object_open_array.json" {
+ try err("{[");
+}
+test "n_structure_open_object_open_string.json" {
+ try err("{\"a");
+}
+test "n_structure_open_object_string_with_apostrophes.json" {
+ try err("{'a'");
+}
+test "n_structure_open_open.json" {
+ try err("[\"\\{[\"\\{[\"\\{[\"\\{");
+}
+test "n_structure_single_eacute.json" {
+ try err("\xe9");
+}
+test "n_structure_single_star.json" {
+ try err("*");
+}
+test "n_structure_trailing_#.json" {
+ try err("{\"a\":\"b\"}#{}");
+}
+test "n_structure_uescaped_LF_before_string.json" {
+ try err("[\\u000A\"\"]");
+}
+test "n_structure_unclosed_array.json" {
+ try err("[1");
+}
+test "n_structure_unclosed_array_partial_null.json" {
+ try err("[ false, nul");
+}
+test "n_structure_unclosed_array_unfinished_false.json" {
+ try err("[ true, fals");
+}
+test "n_structure_unclosed_array_unfinished_true.json" {
+ try err("[ false, tru");
+}
+test "n_structure_unclosed_object.json" {
+ try err("{\"asd\":\"asd\"");
+}
+test "n_structure_unicode-identifier.json" {
+ try err("\xc3\xa5");
+}
+test "n_structure_whitespace_U+2060_word_joiner.json" {
+ try err("[\xe2\x81\xa0]");
+}
+test "n_structure_whitespace_formfeed.json" {
+ try err("[\x0c]");
+}
+test "y_array_arraysWithSpaces.json" {
+ try ok("[[] ]");
+}
+test "y_array_empty-string.json" {
+ try ok("[\"\"]");
+}
+test "y_array_empty.json" {
+ try ok("[]");
+}
+test "y_array_ending_with_newline.json" {
+ try ok("[\"a\"]");
+}
+test "y_array_false.json" {
+ try ok("[false]");
+}
+test "y_array_heterogeneous.json" {
+ try ok("[null, 1, \"1\", {}]");
+}
+test "y_array_null.json" {
+ try ok("[null]");
+}
+test "y_array_with_1_and_newline.json" {
+ try ok("[1\n]");
+}
+test "y_array_with_leading_space.json" {
+ try ok(" [1]");
+}
+test "y_array_with_several_null.json" {
+ try ok("[1,null,null,null,2]");
+}
+test "y_array_with_trailing_space.json" {
+ try ok("[2] ");
+}
+test "y_number.json" {
+ try ok("[123e65]");
+}
+test "y_number_0e+1.json" {
+ try ok("[0e+1]");
+}
+test "y_number_0e1.json" {
+ try ok("[0e1]");
+}
+test "y_number_after_space.json" {
+ try ok("[ 4]");
+}
+test "y_number_double_close_to_zero.json" {
+ try ok("[-0.000000000000000000000000000000000000000000000000000000000000000000000000000001]\n");
+}
+test "y_number_int_with_exp.json" {
+ try ok("[20e1]");
+}
+test "y_number_minus_zero.json" {
+ try ok("[-0]");
+}
+test "y_number_negative_int.json" {
+ try ok("[-123]");
+}
+test "y_number_negative_one.json" {
+ try ok("[-1]");
+}
+test "y_number_negative_zero.json" {
+ try ok("[-0]");
+}
+test "y_number_real_capital_e.json" {
+ try ok("[1E22]");
+}
+test "y_number_real_capital_e_neg_exp.json" {
+ try ok("[1E-2]");
+}
+test "y_number_real_capital_e_pos_exp.json" {
+ try ok("[1E+2]");
+}
+test "y_number_real_exponent.json" {
+ try ok("[123e45]");
+}
+test "y_number_real_fraction_exponent.json" {
+ try ok("[123.456e78]");
+}
+test "y_number_real_neg_exp.json" {
+ try ok("[1e-2]");
+}
+test "y_number_real_pos_exponent.json" {
+ try ok("[1e+2]");
+}
+test "y_number_simple_int.json" {
+ try ok("[123]");
+}
+test "y_number_simple_real.json" {
+ try ok("[123.456789]");
+}
+test "y_object.json" {
+ try ok("{\"asd\":\"sdf\", \"dfg\":\"fgh\"}");
+}
+test "y_object_basic.json" {
+ try ok("{\"asd\":\"sdf\"}");
+}
+test "y_object_duplicated_key.json" {
+ try ok("{\"a\":\"b\",\"a\":\"c\"}");
+}
+test "y_object_duplicated_key_and_value.json" {
+ try ok("{\"a\":\"b\",\"a\":\"b\"}");
+}
+test "y_object_empty.json" {
+ try ok("{}");
+}
+test "y_object_empty_key.json" {
+ try ok("{\"\":0}");
+}
+test "y_object_escaped_null_in_key.json" {
+ try ok("{\"foo\\u0000bar\": 42}");
+}
+test "y_object_extreme_numbers.json" {
+ try ok("{ \"min\": -1.0e+28, \"max\": 1.0e+28 }");
+}
+test "y_object_long_strings.json" {
+ try ok("{\"x\":[{\"id\": \"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"}], \"id\": \"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"}");
+}
+test "y_object_simple.json" {
+ try ok("{\"a\":[]}");
+}
+test "y_object_string_unicode.json" {
+ try ok("{\"title\":\"\\u041f\\u043e\\u043b\\u0442\\u043e\\u0440\\u0430 \\u0417\\u0435\\u043c\\u043b\\u0435\\u043a\\u043e\\u043f\\u0430\" }");
+}
+test "y_object_with_newlines.json" {
+ try ok("{\n\"a\": \"b\"\n}");
+}
+test "y_string_1_2_3_bytes_UTF-8_sequences.json" {
+ try ok("[\"\\u0060\\u012a\\u12AB\"]");
+}
+test "y_string_accepted_surrogate_pair.json" {
+ try ok("[\"\\uD801\\udc37\"]");
+}
+test "y_string_accepted_surrogate_pairs.json" {
+ try ok("[\"\\ud83d\\ude39\\ud83d\\udc8d\"]");
+}
+test "y_string_allowed_escapes.json" {
+ try ok("[\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"]");
+}
+test "y_string_backslash_and_u_escaped_zero.json" {
+ try ok("[\"\\\\u0000\"]");
+}
+test "y_string_backslash_doublequotes.json" {
+ try ok("[\"\\\"\"]");
+}
+test "y_string_comments.json" {
+ try ok("[\"a/*b*/c/*d//e\"]");
+}
+test "y_string_double_escape_a.json" {
+ try ok("[\"\\\\a\"]");
+}
+test "y_string_double_escape_n.json" {
+ try ok("[\"\\\\n\"]");
+}
+test "y_string_escaped_control_character.json" {
+ try ok("[\"\\u0012\"]");
+}
+test "y_string_escaped_noncharacter.json" {
+ try ok("[\"\\uFFFF\"]");
+}
+test "y_string_in_array.json" {
+ try ok("[\"asd\"]");
+}
+test "y_string_in_array_with_leading_space.json" {
+ try ok("[ \"asd\"]");
+}
+test "y_string_last_surrogates_1_and_2.json" {
+ try ok("[\"\\uDBFF\\uDFFF\"]");
+}
+test "y_string_nbsp_uescaped.json" {
+ try ok("[\"new\\u00A0line\"]");
+}
+test "y_string_nonCharacterInUTF-8_U+10FFFF.json" {
+ try ok("[\"\xf4\x8f\xbf\xbf\"]");
+}
+test "y_string_nonCharacterInUTF-8_U+FFFF.json" {
+ try ok("[\"\xef\xbf\xbf\"]");
+}
+test "y_string_null_escape.json" {
+ try ok("[\"\\u0000\"]");
+}
+test "y_string_one-byte-utf-8.json" {
+ try ok("[\"\\u002c\"]");
+}
+test "y_string_pi.json" {
+ try ok("[\"\xcf\x80\"]");
+}
+test "y_string_reservedCharacterInUTF-8_U+1BFFF.json" {
+ try ok("[\"\xf0\x9b\xbf\xbf\"]");
+}
+test "y_string_simple_ascii.json" {
+ try ok("[\"asd \"]");
+}
+test "y_string_space.json" {
+ try ok("\" \"");
+}
+test "y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF.json" {
+ try ok("[\"\\uD834\\uDd1e\"]");
+}
+test "y_string_three-byte-utf-8.json" {
+ try ok("[\"\\u0821\"]");
+}
+test "y_string_two-byte-utf-8.json" {
+ try ok("[\"\\u0123\"]");
+}
+test "y_string_u+2028_line_sep.json" {
+ try ok("[\"\xe2\x80\xa8\"]");
+}
+test "y_string_u+2029_par_sep.json" {
+ try ok("[\"\xe2\x80\xa9\"]");
+}
+test "y_string_uEscape.json" {
+ try ok("[\"\\u0061\\u30af\\u30EA\\u30b9\"]");
+}
+test "y_string_uescaped_newline.json" {
+ try ok("[\"new\\u000Aline\"]");
+}
+test "y_string_unescaped_char_delete.json" {
+ try ok("[\"\x7f\"]");
+}
+test "y_string_unicode.json" {
+ try ok("[\"\\uA66D\"]");
+}
+test "y_string_unicodeEscapedBackslash.json" {
+ try ok("[\"\\u005C\"]");
+}
+test "y_string_unicode_2.json" {
+ try ok("[\"\xe2\x8d\x82\xe3\x88\xb4\xe2\x8d\x82\"]");
+}
+test "y_string_unicode_U+10FFFE_nonchar.json" {
+ try ok("[\"\\uDBFF\\uDFFE\"]");
+}
+test "y_string_unicode_U+1FFFE_nonchar.json" {
+ try ok("[\"\\uD83F\\uDFFE\"]");
+}
+test "y_string_unicode_U+200B_ZERO_WIDTH_SPACE.json" {
+ try ok("[\"\\u200B\"]");
+}
+test "y_string_unicode_U+2064_invisible_plus.json" {
+ try ok("[\"\\u2064\"]");
+}
+test "y_string_unicode_U+FDD0_nonchar.json" {
+ try ok("[\"\\uFDD0\"]");
+}
+test "y_string_unicode_U+FFFE_nonchar.json" {
+ try ok("[\"\\uFFFE\"]");
+}
+test "y_string_unicode_escaped_double_quote.json" {
+ try ok("[\"\\u0022\"]");
+}
+test "y_string_utf8.json" {
+ try ok("[\"\xe2\x82\xac\xf0\x9d\x84\x9e\"]");
+}
+test "y_string_with_del_character.json" {
+ try ok("[\"a\x7fa\"]");
+}
+test "y_structure_lonely_false.json" {
+ try ok("false");
+}
+test "y_structure_lonely_int.json" {
+ try ok("42");
+}
+test "y_structure_lonely_negative_real.json" {
+ try ok("-0.1");
+}
+test "y_structure_lonely_null.json" {
+ try ok("null");
+}
+test "y_structure_lonely_string.json" {
+ try ok("\"asd\"");
+}
+test "y_structure_lonely_true.json" {
+ try ok("true");
+}
+test "y_structure_string_empty.json" {
+ try ok("\"\"");
+}
+test "y_structure_trailing_newline.json" {
+ try ok("[\"a\"]\n");
+}
+test "y_structure_true_in_array.json" {
+ try ok("[true]");
+}
+test "y_structure_whitespace_array.json" {
+ try ok(" [] ");
+}
lib/std/json/scanner.zig
@@ -0,0 +1,1764 @@
+// Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259
+// * RFC 8259 requires JSON documents be valid UTF-8,
+// but makes an allowance for systems that are "part of a closed ecosystem".
+// I have no idea what that's supposed to mean in the context of a standard specification.
+// This implementation requires inputs to be valid UTF-8.
+// * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits,
+// but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed.
+// (RFC 5234 defines HEXDIG to only allow uppercase.)
+// * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value".
+// See http://www.unicode.org/glossary/#unicode_scalar_value .
+// * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences,
+// but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?),
+// which would mean that unpaired surrogate halves are forbidden.
+// By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to)
+// explicitly allows unpaired surrogate halves.
+// This implementation forbids unpaired surrogate halves in \u sequences.
+// If a high surrogate half appears in a \u sequence,
+// then a low surrogate half must immediately follow in \u notation.
+// * RFC 8259 allows implementations to "accept non-JSON forms or extensions".
+// This implementation does not accept any of that.
+// * RFC 8259 allows implementations to put limits on "the size of texts",
+// "the maximum depth of nesting", "the range and precision of numbers",
+// and "the length and character contents of strings".
+// This low-level implementation does not limit these,
+// except where noted above, and except that nesting depth requires memory allocation.
+// Note that this low-level API does not interpret numbers numerically,
+// but simply emits their source form for some higher level code to make sense of.
+// * This low-level implementation allows duplicate object keys,
+// and key/value pairs are emitted in the order they appear in the input.
+
+const std = @import("std");
+
+const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
+const assert = std.debug.assert;
+
+/// Scan the input and check for malformed JSON.
+/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`.
+/// Returns any errors from the allocator as-is, which is unlikely,
+/// but can be caused by extreme nesting depth in the input.
+pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool {
+ var scanner = Scanner.initCompleteInput(allocator, s);
+ defer scanner.deinit();
+
+ while (true) {
+ const token = scanner.next() catch |err| switch (err) {
+ error.SyntaxError, error.UnexpectedEndOfInput => return false,
+ error.OutOfMemory => return error.OutOfMemory,
+ error.BufferUnderrun => unreachable,
+ };
+ if (token == .end_of_document) break;
+ }
+
+ return true;
+}
+
+/// The parsing errors are divided into two categories:
+/// * `SyntaxError` is for clearly malformed JSON documents,
+/// such as giving an input document that isn't JSON at all.
+/// * `UnexpectedEndOfInput` is for signaling that everything's been
+/// valid so far, but the input appears to be truncated for some reason.
+/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`.
+pub const Error = error{ SyntaxError, UnexpectedEndOfInput };
+
+/// Calls `std.json.Reader` with `std.json.default_buffer_size`.
+pub fn reader(allocator: Allocator, io_reader: anytype) Reader(default_buffer_size, @TypeOf(io_reader)) {
+ return Reader(default_buffer_size, @TypeOf(io_reader)).init(allocator, io_reader);
+}
+/// Used by `json.reader`.
+pub const default_buffer_size = 0x1000;
+
+/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar:
+/// ```
+/// <document> = <value> .end_of_document
+/// <value> =
+/// | <object>
+/// | <array>
+/// | <number>
+/// | <string>
+/// | .true
+/// | .false
+/// | .null
+/// <object> = .object_begin ( <string> <value> )* .object_end
+/// <array> = .array_begin ( <value> )* .array_end
+/// <number> = <It depends. See below.>
+/// <string> = <It depends. See below.>
+/// ```
+///
+/// What you get for `<number>` and `<string>` values depends on which `next*()` method you call:
+///
+/// ```
+/// next():
+/// <number> = ( .partial_number )* .number
+/// <string> = ( <partial_string> )* .string
+/// <partial_string> =
+/// | .partial_string
+/// | .partial_string_escaped_1
+/// | .partial_string_escaped_2
+/// | .partial_string_escaped_3
+/// | .partial_string_escaped_4
+///
+/// nextAlloc*(..., .alloc_always):
+/// <number> = .allocated_number
+/// <string> = .allocated_string
+///
+/// nextAlloc*(..., .alloc_if_needed):
+/// <number> =
+/// | .number
+/// | .allocated_number
+/// <string> =
+/// | .string
+/// | .allocated_string
+/// ```
+///
+/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value.
+/// For number values, this is the representation of the number exactly as it appears in the input.
+/// For strings, this is the content of the string after resolving escape sequences.
+///
+/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator.
+/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations.
+///
+/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences.
+/// To get a complete value in memory, you need to concatenate the values yourself.
+/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result.
+///
+/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer.
+/// The memory may become undefined during the next call to `json.Scanner.feedInput()`
+/// or any `json.Reader` method whose return error set includes `json.Error`.
+/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`,
+/// which makes a copy for you.
+///
+/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that
+/// the previously partial value is completed with no additional bytes.
+/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.)
+/// `.partial_*` tokens never have `0` length.
+///
+/// The recommended strategy for using the different `next*()` methods is something like this:
+///
+/// When you're expecting an object key, use `.alloc_if_needed`.
+/// You often don't need a copy of the key string to persist; you might just check which field it is.
+/// In the case that the key happens to require an allocation, free it immediately after checking it.
+///
+/// When you're expecting a meaningful string value (such as on the right of a `:`),
+/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document.
+///
+/// When you're expecting a number value, use `.alloc_if_needed`.
+/// You're probably going to be parsing the string representation of the number into a numeric representation,
+/// so you need the complete string representation only temporarily.
+///
+/// When you're skipping an unrecognized value, use `skipValue()`.
+pub const Token = union(enum) {
+ object_begin,
+ object_end,
+ array_begin,
+ array_end,
+
+ true,
+ false,
+ null,
+
+ number: []const u8,
+ partial_number: []const u8,
+ allocated_number: []u8,
+
+ string: []const u8,
+ partial_string: []const u8,
+ partial_string_escaped_1: [1]u8,
+ partial_string_escaped_2: [2]u8,
+ partial_string_escaped_3: [3]u8,
+ partial_string_escaped_4: [4]u8,
+ allocated_string: []u8,
+
+ end_of_document,
+};
+
+/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call.
+pub const TokenType = enum {
+ object_begin,
+ object_end,
+ array_begin,
+ array_end,
+ true,
+ false,
+ null,
+ number,
+ string,
+ end_of_document,
+};
+
+/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);`
+/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized.
+/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()`
+/// to get meaningful information from this.
+pub const Diagnostics = struct {
+ line_number: u64 = 1,
+ line_start_cursor: usize = @bitCast(usize, @as(isize, -1)), // Start just "before" the input buffer to get a 1-based column for line 1.
+ total_bytes_before_current_input: u64 = 0,
+ cursor_pointer: *const usize = undefined,
+
+ /// Starts at 1.
+ pub fn getLine(self: *const @This()) u64 {
+ return self.line_number;
+ }
+ /// Starts at 1.
+ pub fn getColumn(self: *const @This()) u64 {
+ return self.cursor_pointer.* -% self.line_start_cursor;
+ }
+ /// Starts at 0. Measures the byte offset since the start of the input.
+ pub fn getByteOffset(self: *const @This()) u64 {
+ return self.total_bytes_before_current_input + self.cursor_pointer.*;
+ }
+};
+
+/// See the documentation for `std.json.Token`.
+pub const AllocWhen = enum { alloc_if_needed, alloc_always };
+
+/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default.
+/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`.
+pub const default_max_value_len = 4 * 1024 * 1024;
+
+/// Connects a `std.io.Reader` to a `std.json.Scanner`.
+/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader.
+pub fn Reader(comptime buffer_size: usize, comptime ReaderType: type) type {
+ return struct {
+ scanner: Scanner,
+ reader: ReaderType,
+
+ buffer: [buffer_size]u8 = undefined,
+
+ /// The allocator is only used to track `[]` and `{}` nesting levels.
+ pub fn init(allocator: Allocator, io_reader: ReaderType) @This() {
+ return .{
+ .scanner = Scanner.initStreaming(allocator),
+ .reader = io_reader,
+ };
+ }
+ pub fn deinit(self: *@This()) void {
+ self.scanner.deinit();
+ self.* = undefined;
+ }
+
+ /// Calls `std.json.Scanner.enableDiagnostics`.
+ pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
+ self.scanner.enableDiagnostics(diagnostics);
+ }
+
+ pub const NextError = ReaderType.Error || Error || Allocator.Error;
+ pub const SkipError = NextError;
+ pub const AllocError = NextError || error{ValueTooLong};
+ pub const PeekError = ReaderType.Error || Error;
+
+ /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
+ /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+ pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
+ return self.nextAllocMax(allocator, when, default_max_value_len);
+ }
+ /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+ pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
+ const token_type = try self.peekNextTokenType();
+ switch (token_type) {
+ .number, .string => {
+ var value_list = ArrayList(u8).init(allocator);
+ errdefer {
+ value_list.deinit();
+ }
+ if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| {
+ return if (token_type == .number)
+ Token{ .number = slice }
+ else
+ Token{ .string = slice };
+ } else {
+ return if (token_type == .number)
+ Token{ .allocated_number = try value_list.toOwnedSlice() }
+ else
+ Token{ .allocated_string = try value_list.toOwnedSlice() };
+ }
+ },
+
+ // Simple tokens never alloc.
+ .object_begin,
+ .object_end,
+ .array_begin,
+ .array_end,
+ .true,
+ .false,
+ .null,
+ .end_of_document,
+ => return try self.next(),
+ }
+ }
+
+ /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
+ pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocError!?[]const u8 {
+ return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
+ }
+ /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`.
+ pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocError!?[]const u8 {
+ while (true) {
+ return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) {
+ error.BufferUnderrun => {
+ try self.refillBuffer();
+ continue;
+ },
+ else => |other_err| return other_err,
+ };
+ }
+ }
+
+ /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`.
+ pub fn skipValue(self: *@This()) SkipError!void {
+ switch (try self.peekNextTokenType()) {
+ .object_begin, .array_begin => {
+ try self.skipUntilStackHeight(self.stackHeight());
+ },
+ .number, .string => {
+ while (true) {
+ switch (try self.next()) {
+ .partial_number,
+ .partial_string,
+ .partial_string_escaped_1,
+ .partial_string_escaped_2,
+ .partial_string_escaped_3,
+ .partial_string_escaped_4,
+ => continue,
+
+ .number, .string => break,
+
+ else => unreachable,
+ }
+ }
+ },
+ .true, .false, .null => {
+ _ = try self.next();
+ },
+
+ .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
+ }
+ }
+ /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`.
+ pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: u32) NextError!void {
+ while (true) {
+ return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) {
+ error.BufferUnderrun => {
+ try self.refillBuffer();
+ continue;
+ },
+ else => |other_err| return other_err,
+ };
+ }
+ }
+
+ /// Calls `std.json.Scanner.stackHeight`.
+ pub fn stackHeight(self: *const @This()) u32 {
+ return self.scanner.stackHeight();
+ }
+ /// Calls `std.json.Scanner.ensureTotalStackCapacity`.
+ pub fn ensureTotalStackCapacity(self: *@This(), height: u32) Allocator.Error!void {
+ try self.scanner.ensureTotalStackCapacity(height);
+ }
+
+ /// See `std.json.Token` for documentation of this function.
+ pub fn next(self: *@This()) NextError!Token {
+ while (true) {
+ return self.scanner.next() catch |err| switch (err) {
+ error.BufferUnderrun => {
+ try self.refillBuffer();
+ continue;
+ },
+ else => |other_err| return other_err,
+ };
+ }
+ }
+
+ /// See `std.json.Scanner.peekNextTokenType()`.
+ pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
+ while (true) {
+ return self.scanner.peekNextTokenType() catch |err| switch (err) {
+ error.BufferUnderrun => {
+ try self.refillBuffer();
+ continue;
+ },
+ else => |other_err| return other_err,
+ };
+ }
+ }
+
+ fn refillBuffer(self: *@This()) ReaderType.Error!void {
+ const input = self.buffer[0..try self.reader.read(self.buffer[0..])];
+ if (input.len > 0) {
+ self.scanner.feedInput(input);
+ } else {
+ self.scanner.endInput();
+ }
+ }
+ };
+}
+
+/// The lowest level parsing API in this package;
+/// supports streaming input with a low memory footprint.
+/// The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input.
+/// Specifically `d/8` bytes are required for this purpose,
+/// with some extra buffer according to the implementation of `std.ArrayList`.
+///
+/// This scanner can emit partial tokens; see `std.json.Token`.
+/// The input to this class is a sequence of input buffers that you must supply one at a time.
+/// Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned.
+/// Then call `feedInput()` again and so forth.
+/// Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`,
+/// or when `error.BufferUnderrun` requests more data and there is no more.
+/// Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned.
+pub const Scanner = struct {
+ state: State = .value,
+ string_is_object_key: bool = false,
+ stack: BitStack,
+ value_start: usize = undefined,
+ unicode_code_point: u21 = undefined,
+
+ input: []const u8 = "",
+ cursor: usize = 0,
+ is_end_of_input: bool = false,
+ diagnostics: ?*Diagnostics = null,
+
+ /// The allocator is only used to track `[]` and `{}` nesting levels.
+ pub fn initStreaming(allocator: Allocator) @This() {
+ return .{
+ .stack = BitStack.init(allocator),
+ };
+ }
+ /// Use this if your input is a single slice.
+ /// This is effectively equivalent to:
+ /// ```
+ /// initStreaming(allocator);
+ /// feedInput(complete_input);
+ /// endInput();
+ /// ```
+ pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() {
+ return .{
+ .stack = BitStack.init(allocator),
+ .input = complete_input,
+ .is_end_of_input = true,
+ };
+ }
+ pub fn deinit(self: *@This()) void {
+ self.stack.deinit();
+ self.* = undefined;
+ }
+
+ pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
+ diagnostics.cursor_pointer = &self.cursor;
+ self.diagnostics = diagnostics;
+ }
+
+ /// Call this whenever you get `error.BufferUnderrun` from `next()`.
+ /// When there is no more input to provide, call `endInput()`.
+ pub fn feedInput(self: *@This(), input: []const u8) void {
+ assert(self.cursor == self.input.len); // Not done with the last input slice.
+ if (self.diagnostics) |diag| {
+ diag.total_bytes_before_current_input += self.input.len;
+ // This usually goes "negative" to measure how far before the beginning
+ // of the new buffer the current line started.
+ diag.line_start_cursor -%= self.cursor;
+ }
+ self.input = input;
+ self.cursor = 0;
+ self.value_start = 0;
+ }
+ /// Call this when you will no longer call `feedInput()` anymore.
+ /// This can be called either immediately after the last `feedInput()`,
+ /// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`.
+ /// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`.
+ pub fn endInput(self: *@This()) void {
+ self.is_end_of_input = true;
+ }
+
+ pub const NextError = Error || Allocator.Error || error{BufferUnderrun};
+ pub const AllocError = Error || Allocator.Error || error{ValueTooLong};
+ pub const PeekError = Error || error{BufferUnderrun};
+ pub const SkipError = Error || Allocator.Error;
+ pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun};
+
+ /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
+ /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+ /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+ pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
+ return self.nextAllocMax(allocator, when, default_max_value_len);
+ }
+
+ /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+ /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+ pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
+ assert(self.is_end_of_input); // This function is not available in streaming mode.
+ const token_type = self.peekNextTokenType() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ switch (token_type) {
+ .number, .string => {
+ var value_list = ArrayList(u8).init(allocator);
+ errdefer {
+ value_list.deinit();
+ }
+ if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) |slice| {
+ return if (token_type == .number)
+ Token{ .number = slice }
+ else
+ Token{ .string = slice };
+ } else {
+ return if (token_type == .number)
+ Token{ .allocated_number = try value_list.toOwnedSlice() }
+ else
+ Token{ .allocated_string = try value_list.toOwnedSlice() };
+ }
+ },
+
+ // Simple tokens never alloc.
+ .object_begin,
+ .object_end,
+ .array_begin,
+ .array_end,
+ .true,
+ .false,
+ .null,
+ .end_of_document,
+ => return self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ },
+ }
+ }
+
+ /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
+ pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 {
+ return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
+ }
+ /// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`.
+ /// When allocation is not necessary with `.alloc_if_needed`,
+ /// this method returns the content slice from the input buffer, and `value_list` is not touched.
+ /// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`,
+ /// and returns `null` once the final `.number` or `.string` token has been written into it.
+ /// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list.
+ /// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation
+ /// can be resumed by passing the same array list in again.
+ /// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type;
+ /// the caller of this method is expected to know which type of token is being processed.
+ pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 {
+ while (true) {
+ const token = try self.next();
+ switch (token) {
+ // Accumulate partial values.
+ .partial_number, .partial_string => |slice| {
+ try appendSlice(value_list, slice, max_value_len);
+ },
+ .partial_string_escaped_1 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_2 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_3 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_4 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+
+ // Return complete values.
+ .number => |slice| {
+ if (when == .alloc_if_needed and value_list.items.len == 0) {
+ // No alloc necessary.
+ return slice;
+ }
+ try appendSlice(value_list, slice, max_value_len);
+ // The token is complete.
+ return null;
+ },
+ .string => |slice| {
+ if (when == .alloc_if_needed and value_list.items.len == 0) {
+ // No alloc necessary.
+ return slice;
+ }
+ try appendSlice(value_list, slice, max_value_len);
+ // The token is complete.
+ return null;
+ },
+
+ .object_begin,
+ .object_end,
+ .array_begin,
+ .array_end,
+ .true,
+ .false,
+ .null,
+ .end_of_document,
+ => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this.
+
+ .allocated_number, .allocated_string => unreachable,
+ }
+ }
+ }
+
+ /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+ /// If the next token type is `.object_begin` or `.array_begin`,
+ /// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found.
+ /// If the next token type is `.number` or `.string`,
+ /// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found.
+ /// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once.
+ /// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`;
+ /// see `peekNextTokenType()`.
+ pub fn skipValue(self: *@This()) SkipError!void {
+ assert(self.is_end_of_input); // This function is not available in streaming mode.
+ switch (self.peekNextTokenType() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) {
+ .object_begin, .array_begin => {
+ self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ },
+ .number, .string => {
+ while (true) {
+ switch (self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) {
+ .partial_number,
+ .partial_string,
+ .partial_string_escaped_1,
+ .partial_string_escaped_2,
+ .partial_string_escaped_3,
+ .partial_string_escaped_4,
+ => continue,
+
+ .number, .string => break,
+
+ else => unreachable,
+ }
+ }
+ },
+ .true, .false, .null => {
+ _ = self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ },
+
+ .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
+ }
+ }
+
+ /// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height.
+ /// Unlike `skipValue()`, this function is available in streaming mode.
+ pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: u32) NextError!void {
+ while (true) {
+ switch (try self.next()) {
+ .object_end, .array_end => {
+ if (self.stackHeight() == terminal_stack_height) break;
+ },
+ .end_of_document => unreachable,
+ else => continue,
+ }
+ }
+ }
+
+ /// The depth of `{}` or `[]` nesting levels at the current position.
+ pub fn stackHeight(self: *const @This()) u32 {
+ return self.stack.bit_len;
+ }
+
+ /// Pre allocate memory to hold the given number of nesting levels.
+ /// `stackHeight()` up to the given number will not cause allocations.
+ pub fn ensureTotalStackCapacity(self: *@This(), height: u32) Allocator.Error!void {
+ try self.stack.ensureTotalCapacity(height);
+ }
+
+ /// See `std.json.Token` for documentation of this function.
+ pub fn next(self: *@This()) NextError!Token {
+ state_loop: while (true) {
+ switch (self.state) {
+ .value => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ // Object, Array
+ '{' => {
+ try self.stack.push(OBJECT_MODE);
+ self.cursor += 1;
+ self.state = .object_start;
+ return .object_begin;
+ },
+ '[' => {
+ try self.stack.push(ARRAY_MODE);
+ self.cursor += 1;
+ self.state = .array_start;
+ return .array_begin;
+ },
+
+ // String
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ continue :state_loop;
+ },
+
+ // Number
+ '1'...'9' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_int;
+ continue :state_loop;
+ },
+ '0' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_leading_zero;
+ continue :state_loop;
+ },
+ '-' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_minus;
+ continue :state_loop;
+ },
+
+ // literal values
+ 't' => {
+ self.cursor += 1;
+ self.state = .literal_t;
+ continue :state_loop;
+ },
+ 'f' => {
+ self.cursor += 1;
+ self.state = .literal_f;
+ continue :state_loop;
+ },
+ 'n' => {
+ self.cursor += 1;
+ self.state = .literal_n;
+ continue :state_loop;
+ },
+
+ else => return error.SyntaxError,
+ }
+ },
+
+ .post_value => {
+ if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
+
+ const c = self.input[self.cursor];
+ if (self.string_is_object_key) {
+ self.string_is_object_key = false;
+ switch (c) {
+ ':' => {
+ self.cursor += 1;
+ self.state = .value;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ }
+
+ switch (c) {
+ '}' => {
+ if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError;
+ self.cursor += 1;
+ // stay in .post_value state.
+ return .object_end;
+ },
+ ']' => {
+ if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError;
+ self.cursor += 1;
+ // stay in .post_value state.
+ return .array_end;
+ },
+ ',' => {
+ switch (self.stack.peek()) {
+ OBJECT_MODE => {
+ self.state = .object_post_comma;
+ },
+ ARRAY_MODE => {
+ self.state = .value;
+ },
+ }
+ self.cursor += 1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .object_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ self.string_is_object_key = true;
+ continue :state_loop;
+ },
+ '}' => {
+ self.cursor += 1;
+ _ = self.stack.pop();
+ self.state = .post_value;
+ return .object_end;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .object_post_comma => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ self.string_is_object_key = true;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .array_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ ']' => {
+ self.cursor += 1;
+ _ = self.stack.pop();
+ self.state = .post_value;
+ return .array_end;
+ },
+ else => {
+ self.state = .value;
+ continue :state_loop;
+ },
+ }
+ },
+
+ .number_minus => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0' => {
+ self.cursor += 1;
+ self.state = .number_leading_zero;
+ continue :state_loop;
+ },
+ '1'...'9' => {
+ self.cursor += 1;
+ self.state = .number_int;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_leading_zero => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true);
+ switch (self.input[self.cursor]) {
+ '.' => {
+ self.cursor += 1;
+ self.state = .number_post_dot;
+ continue :state_loop;
+ },
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ },
+ .number_int => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ '.' => {
+ self.cursor += 1;
+ self.state = .number_post_dot;
+ continue :state_loop;
+ },
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+ .number_post_dot => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (try self.expectByte()) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_frac;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_frac => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+ .number_post_e => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_exp;
+ continue :state_loop;
+ },
+ '+', '-' => {
+ self.cursor += 1;
+ self.state = .number_post_e_sign;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_post_e_sign => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_exp;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_exp => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+
+ .string => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string.
+
+ // ASCII plain text.
+ 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue,
+
+ // Special characters.
+ '"' => {
+ const result = Token{ .string = self.takeValueSlice() };
+ self.cursor += 1;
+ self.state = .post_value;
+ return result;
+ },
+ '\\' => {
+ const slice = self.takeValueSlice();
+ self.cursor += 1;
+ self.state = .string_backslash;
+ if (slice.len > 0) return Token{ .partial_string = slice };
+ continue :state_loop;
+ },
+
+ // UTF-8 validation.
+ // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
+ 0xC2...0xDF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ 0xE0 => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte_guard_against_overlong;
+ continue :state_loop;
+ },
+ 0xE1...0xEC, 0xEE...0xEF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ 0xED => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half;
+ continue :state_loop;
+ },
+ 0xF0 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte_guard_against_overlong;
+ continue :state_loop;
+ },
+ 0xF1...0xF3 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte;
+ continue :state_loop;
+ },
+ 0xF4 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte_guard_against_too_large;
+ continue :state_loop;
+ },
+ 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8.
+ }
+ }
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ const slice = self.takeValueSlice();
+ if (slice.len > 0) return Token{ .partial_string = slice };
+ return error.BufferUnderrun;
+ },
+ .string_backslash => {
+ switch (try self.expectByte()) {
+ '"', '\\', '/' => {
+ // Since these characters now represent themselves literally,
+ // we can simply begin the next plaintext slice here.
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .string;
+ continue :state_loop;
+ },
+ 'b' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{0x08} };
+ },
+ 'f' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{0x0c} };
+ },
+ 'n' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\n'} };
+ },
+ 'r' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\r'} };
+ },
+ 't' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\t'} };
+ },
+ 'u' => {
+ self.cursor += 1;
+ self.state = .string_backslash_u;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .string_backslash_u => {
+ const c = try self.expectByte();
+ switch (c) {
+ '0'...'9' => {
+ self.unicode_code_point = @as(u21, c - '0') << 12;
+ },
+ 'A'...'F' => {
+ self.unicode_code_point = @as(u21, c - 'A' + 10) << 12;
+ },
+ 'a'...'f' => {
+ self.unicode_code_point = @as(u21, c - 'a' + 10) << 12;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_1;
+ continue :state_loop;
+ },
+ .string_backslash_u_1 => {
+ const c = try self.expectByte();
+ switch (c) {
+ '0'...'9' => {
+ self.unicode_code_point |= @as(u21, c - '0') << 8;
+ },
+ 'A'...'F' => {
+ self.unicode_code_point |= @as(u21, c - 'A' + 10) << 8;
+ },
+ 'a'...'f' => {
+ self.unicode_code_point |= @as(u21, c - 'a' + 10) << 8;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_2;
+ continue :state_loop;
+ },
+ .string_backslash_u_2 => {
+ const c = try self.expectByte();
+ switch (c) {
+ '0'...'9' => {
+ self.unicode_code_point |= @as(u21, c - '0') << 4;
+ },
+ 'A'...'F' => {
+ self.unicode_code_point |= @as(u21, c - 'A' + 10) << 4;
+ },
+ 'a'...'f' => {
+ self.unicode_code_point |= @as(u21, c - 'a' + 10) << 4;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_3;
+ continue :state_loop;
+ },
+ .string_backslash_u_3 => {
+ const c = try self.expectByte();
+ switch (c) {
+ '0'...'9' => {
+ self.unicode_code_point |= c - '0';
+ },
+ 'A'...'F' => {
+ self.unicode_code_point |= c - 'A' + 10;
+ },
+ 'a'...'f' => {
+ self.unicode_code_point |= c - 'a' + 10;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ switch (self.unicode_code_point) {
+ 0xD800...0xDBFF => {
+ // High surrogate half.
+ self.unicode_code_point = 0x10000 | (self.unicode_code_point << 10);
+ self.state = .string_surrogate_half;
+ continue :state_loop;
+ },
+ 0xDC00...0xDFFF => return error.SyntaxError, // Unexpected low surrogate half.
+ else => {
+ // Code point from a single UTF-16 code unit.
+ self.value_start = self.cursor;
+ self.state = .string;
+ return self.partialStringCodepoint();
+ },
+ }
+ },
+ .string_surrogate_half => {
+ switch (try self.expectByte()) {
+ '\\' => {
+ self.cursor += 1;
+ self.state = .string_surrogate_half_backslash;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash => {
+ switch (try self.expectByte()) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .string_surrogate_half_backslash_u;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u => {
+ switch (try self.expectByte()) {
+ 'D', 'd' => {
+ self.cursor += 1;
+ self.state = .string_surrogate_half_backslash_u_1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u_1 => {
+ const c = try self.expectByte();
+ switch (c) {
+ 'C'...'F' => {
+ self.cursor += 1;
+ self.unicode_code_point |= @as(u21, c - 'C') << 8;
+ self.state = .string_surrogate_half_backslash_u_2;
+ continue :state_loop;
+ },
+ 'c'...'f' => {
+ self.cursor += 1;
+ self.unicode_code_point |= @as(u21, c - 'c') << 8;
+ self.state = .string_surrogate_half_backslash_u_2;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u_2 => {
+ const c = try self.expectByte();
+ switch (c) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.unicode_code_point |= @as(u21, c - '0') << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ 'A'...'F' => {
+ self.cursor += 1;
+ self.unicode_code_point |= @as(u21, c - 'A' + 10) << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ 'a'...'f' => {
+ self.cursor += 1;
+ self.unicode_code_point |= @as(u21, c - 'a' + 10) << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .string_surrogate_half_backslash_u_3 => {
+ const c = try self.expectByte();
+ switch (c) {
+ '0'...'9' => {
+ self.unicode_code_point |= c - '0';
+ },
+ 'A'...'F' => {
+ self.unicode_code_point |= c - 'A' + 10;
+ },
+ 'a'...'f' => {
+ self.unicode_code_point |= c - 'a' + 10;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return self.partialStringCodepoint();
+ },
+
+ .string_utf8_last_byte => {
+ switch (try self.expectByte()) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte => {
+ switch (try self.expectByte()) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte_guard_against_overlong => {
+ switch (try self.expectByte()) {
+ 0xA0...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half => {
+ switch (try self.expectByte()) {
+ 0x80...0x9F => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte => {
+ switch (try self.expectByte()) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte_guard_against_overlong => {
+ switch (try self.expectByte()) {
+ 0x90...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte_guard_against_too_large => {
+ switch (try self.expectByte()) {
+ 0x80...0x8F => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+
+ .literal_t => {
+ switch (try self.expectByte()) {
+ 'r' => {
+ self.cursor += 1;
+ self.state = .literal_tr;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_tr => {
+ switch (try self.expectByte()) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .literal_tru;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_tru => {
+ switch (try self.expectByte()) {
+ 'e' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .true;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_f => {
+ switch (try self.expectByte()) {
+ 'a' => {
+ self.cursor += 1;
+ self.state = .literal_fa;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fa => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .literal_fal;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fal => {
+ switch (try self.expectByte()) {
+ 's' => {
+ self.cursor += 1;
+ self.state = .literal_fals;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fals => {
+ switch (try self.expectByte()) {
+ 'e' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .false;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_n => {
+ switch (try self.expectByte()) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .literal_nu;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_nu => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .literal_nul;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_nul => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .null;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ }
+ unreachable;
+ }
+ }
+
+ /// Seeks ahead in the input until the first byte of the next token (or the end of the input)
+ /// determines which type of token will be returned from the next `next*()` call.
+ /// This function is idempotent, only advancing past commas, colons, and inter-token whitespace.
+ pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
+ state_loop: while (true) {
+ switch (self.state) {
+ .value => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '{' => return .object_begin,
+ '[' => return .array_begin,
+ '"' => return .string,
+ '-', '0'...'9' => return .number,
+ 't' => return .true,
+ 'f' => return .false,
+ 'n' => return .null,
+ else => return error.SyntaxError,
+ }
+ },
+
+ .post_value => {
+ if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
+
+ const c = self.input[self.cursor];
+ if (self.string_is_object_key) {
+ self.string_is_object_key = false;
+ switch (c) {
+ ':' => {
+ self.cursor += 1;
+ self.state = .value;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ }
+
+ switch (c) {
+ '}' => return .object_end,
+ ']' => return .array_end,
+ ',' => {
+ switch (self.stack.peek()) {
+ OBJECT_MODE => {
+ self.state = .object_post_comma;
+ },
+ ARRAY_MODE => {
+ self.state = .value;
+ },
+ }
+ self.cursor += 1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .object_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => return .string,
+ '}' => return .object_end,
+ else => return error.SyntaxError,
+ }
+ },
+ .object_post_comma => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => return .string,
+ else => return error.SyntaxError,
+ }
+ },
+
+ .array_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ ']' => return .array_end,
+ else => {
+ self.state = .value;
+ continue :state_loop;
+ },
+ }
+ },
+
+ .number_minus,
+ .number_leading_zero,
+ .number_int,
+ .number_post_dot,
+ .number_frac,
+ .number_post_e,
+ .number_post_e_sign,
+ .number_exp,
+ => return .number,
+
+ .string,
+ .string_backslash,
+ .string_backslash_u,
+ .string_backslash_u_1,
+ .string_backslash_u_2,
+ .string_backslash_u_3,
+ .string_surrogate_half,
+ .string_surrogate_half_backslash,
+ .string_surrogate_half_backslash_u,
+ .string_surrogate_half_backslash_u_1,
+ .string_surrogate_half_backslash_u_2,
+ .string_surrogate_half_backslash_u_3,
+ => return .string,
+
+ .string_utf8_last_byte,
+ .string_utf8_second_to_last_byte,
+ .string_utf8_second_to_last_byte_guard_against_overlong,
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half,
+ .string_utf8_third_to_last_byte,
+ .string_utf8_third_to_last_byte_guard_against_overlong,
+ .string_utf8_third_to_last_byte_guard_against_too_large,
+ => return .string,
+
+ .literal_t,
+ .literal_tr,
+ .literal_tru,
+ => return .true,
+ .literal_f,
+ .literal_fa,
+ .literal_fal,
+ .literal_fals,
+ => return .false,
+ .literal_n,
+ .literal_nu,
+ .literal_nul,
+ => return .null,
+ }
+ unreachable;
+ }
+ }
+
+ const State = enum {
+ value,
+ post_value,
+
+ object_start,
+ object_post_comma,
+
+ array_start,
+
+ number_minus,
+ number_leading_zero,
+ number_int,
+ number_post_dot,
+ number_frac,
+ number_post_e,
+ number_post_e_sign,
+ number_exp,
+
+ string,
+ string_backslash,
+ string_backslash_u,
+ string_backslash_u_1,
+ string_backslash_u_2,
+ string_backslash_u_3,
+ string_surrogate_half,
+ string_surrogate_half_backslash,
+ string_surrogate_half_backslash_u,
+ string_surrogate_half_backslash_u_1,
+ string_surrogate_half_backslash_u_2,
+ string_surrogate_half_backslash_u_3,
+
+ // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
+ string_utf8_last_byte, // State A
+ string_utf8_second_to_last_byte, // State B
+ string_utf8_second_to_last_byte_guard_against_overlong, // State C
+ string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D
+ string_utf8_third_to_last_byte, // State E
+ string_utf8_third_to_last_byte_guard_against_overlong, // State F
+ string_utf8_third_to_last_byte_guard_against_too_large, // State G
+
+ literal_t,
+ literal_tr,
+ literal_tru,
+ literal_f,
+ literal_fa,
+ literal_fal,
+ literal_fals,
+ literal_n,
+ literal_nu,
+ literal_nul,
+ };
+
+ fn expectByte(self: *const @This()) !u8 {
+ if (self.cursor < self.input.len) {
+ return self.input[self.cursor];
+ }
+ // No byte.
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ return error.BufferUnderrun;
+ }
+
+ fn skipWhitespace(self: *@This()) void {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ // Whitespace
+ ' ', '\t', '\r' => continue,
+ '\n' => {
+ if (self.diagnostics) |diag| {
+ diag.line_number += 1;
+ // This will count the newline itself,
+ // which means a straight-forward subtraction will give a 1-based column number.
+ diag.line_start_cursor = self.cursor;
+ }
+ continue;
+ },
+ else => return,
+ }
+ }
+ }
+
+ fn skipWhitespaceExpectByte(self: *@This()) !u8 {
+ self.skipWhitespace();
+ return self.expectByte();
+ }
+
+ fn skipWhitespaceCheckEnd(self: *@This()) !bool {
+ self.skipWhitespace();
+ if (self.cursor >= self.input.len) {
+ // End of buffer.
+ if (self.is_end_of_input) {
+ // End of everything.
+ if (self.stackHeight() == 0) {
+ // We did it!
+ return true;
+ }
+ return error.UnexpectedEndOfInput;
+ }
+ return error.BufferUnderrun;
+ }
+ if (self.stackHeight() == 0) return error.SyntaxError;
+ return false;
+ }
+
+ fn takeValueSlice(self: *@This()) []const u8 {
+ const slice = self.input[self.value_start..self.cursor];
+ self.value_start = self.cursor;
+ return slice;
+ }
+
+ fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token {
+ const slice = self.takeValueSlice();
+ if (self.is_end_of_input) {
+ if (!allow_end) return error.UnexpectedEndOfInput;
+ self.state = .post_value;
+ return Token{ .number = slice };
+ }
+ if (slice.len == 0) return error.BufferUnderrun;
+ return Token{ .partial_number = slice };
+ }
+
+ fn partialStringCodepoint(self: *@This()) Token {
+ const code_point = self.unicode_code_point;
+ self.unicode_code_point = undefined;
+ var buf: [4]u8 = undefined;
+ switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) {
+ 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* },
+ 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* },
+ 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* },
+ 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* },
+ else => unreachable,
+ }
+ }
+};
+
+const OBJECT_MODE = 0;
+const ARRAY_MODE = 1;
+
+const BitStack = struct {
+ bytes: std.ArrayList(u8),
+ bit_len: u32 = 0,
+
+ pub fn init(allocator: Allocator) @This() {
+ return .{
+ .bytes = std.ArrayList(u8).init(allocator),
+ };
+ }
+
+ pub fn deinit(self: *@This()) void {
+ self.bytes.deinit();
+ self.* = undefined;
+ }
+
+ pub fn ensureTotalCapacity(self: *@This(), bit_capcity: u32) Allocator.Error!void {
+ const byte_capacity = (bit_capcity + 7) >> 3;
+ try self.bytes.ensureTotalCapacity(byte_capacity);
+ }
+
+ pub fn push(self: *@This(), b: u1) Allocator.Error!void {
+ const byte_index = self.bit_len >> 3;
+ const bit_index = @intCast(u3, self.bit_len & 7);
+
+ if (self.bytes.items.len <= byte_index) {
+ try self.bytes.append(0);
+ }
+
+ self.bytes.items[byte_index] &= ~(@as(u8, 1) << bit_index);
+ self.bytes.items[byte_index] |= @as(u8, b) << bit_index;
+
+ self.bit_len += 1;
+ }
+
+ pub fn peek(self: *const @This()) u1 {
+ const byte_index = (self.bit_len - 1) >> 3;
+ const bit_index = @intCast(u3, (self.bit_len - 1) & 7);
+ return @intCast(u1, (self.bytes.items[byte_index] >> bit_index) & 1);
+ }
+
+ pub fn pop(self: *@This()) u1 {
+ const b = self.peek();
+ self.bit_len -= 1;
+ return b;
+ }
+};
+
+fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void {
+ const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong;
+ if (new_len > max_value_len) return error.ValueTooLong;
+ try list.appendSlice(buf);
+}
+
+/// For the slice you get from a `Token.number` or `Token.allocated_number`,
+/// this function returns true if the number doesn't contain any fraction or exponent components.
+/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`.
+/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value.
+/// This function will not give meaningful results on non-numeric input.
+pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool {
+ return std.mem.indexOfAny(u8, value, ".eE") == null;
+}
+
+test {
+ _ = @import("./scanner_test.zig");
+}
lib/std/json/scanner_test.zig
@@ -0,0 +1,466 @@
+const std = @import("std");
+const JsonScanner = @import("./scanner.zig").Scanner;
+const jsonReader = @import("./scanner.zig").reader;
+const JsonReader = @import("./scanner.zig").Reader;
+const Token = @import("./scanner.zig").Token;
+const TokenType = @import("./scanner.zig").TokenType;
+const Diagnostics = @import("./scanner.zig").Diagnostics;
+const Error = @import("./scanner.zig").Error;
+const validate = @import("./scanner.zig").validate;
+
+const example_document_str =
+ \\{
+ \\ "Image": {
+ \\ "Width": 800,
+ \\ "Height": 600,
+ \\ "Title": "View from 15th Floor",
+ \\ "Thumbnail": {
+ \\ "Url": "http://www.example.com/image/481989943",
+ \\ "Height": 125,
+ \\ "Width": 100
+ \\ },
+ \\ "Animated" : false,
+ \\ "IDs": [116, 943, 234, 38793]
+ \\ }
+ \\}
+;
+
+fn expectNext(scanner_or_reader: anytype, expected_token: Token) !void {
+ return expectEqualTokens(expected_token, try scanner_or_reader.next());
+}
+
+fn expectPeekNext(scanner_or_reader: anytype, expected_token_type: TokenType, expected_token: Token) !void {
+ try std.testing.expectEqual(expected_token_type, try scanner_or_reader.peekNextTokenType());
+ try expectEqualTokens(expected_token, try scanner_or_reader.next());
+}
+
+test "json.token" {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str);
+ defer scanner.deinit();
+
+ try expectNext(&scanner, .object_begin);
+ try expectNext(&scanner, Token{ .string = "Image" });
+ try expectNext(&scanner, .object_begin);
+ try expectNext(&scanner, Token{ .string = "Width" });
+ try expectNext(&scanner, Token{ .number = "800" });
+ try expectNext(&scanner, Token{ .string = "Height" });
+ try expectNext(&scanner, Token{ .number = "600" });
+ try expectNext(&scanner, Token{ .string = "Title" });
+ try expectNext(&scanner, Token{ .string = "View from 15th Floor" });
+ try expectNext(&scanner, Token{ .string = "Thumbnail" });
+ try expectNext(&scanner, .object_begin);
+ try expectNext(&scanner, Token{ .string = "Url" });
+ try expectNext(&scanner, Token{ .string = "http://www.example.com/image/481989943" });
+ try expectNext(&scanner, Token{ .string = "Height" });
+ try expectNext(&scanner, Token{ .number = "125" });
+ try expectNext(&scanner, Token{ .string = "Width" });
+ try expectNext(&scanner, Token{ .number = "100" });
+ try expectNext(&scanner, .object_end);
+ try expectNext(&scanner, Token{ .string = "Animated" });
+ try expectNext(&scanner, .false);
+ try expectNext(&scanner, Token{ .string = "IDs" });
+ try expectNext(&scanner, .array_begin);
+ try expectNext(&scanner, Token{ .number = "116" });
+ try expectNext(&scanner, Token{ .number = "943" });
+ try expectNext(&scanner, Token{ .number = "234" });
+ try expectNext(&scanner, Token{ .number = "38793" });
+ try expectNext(&scanner, .array_end);
+ try expectNext(&scanner, .object_end);
+ try expectNext(&scanner, .object_end);
+ try expectNext(&scanner, .end_of_document);
+}
+
+const all_types_test_case =
+ \\[
+ \\ "", "a\nb",
+ \\ 0, 0.0, -1.1e-1,
+ \\ true, false, null,
+ \\ {"a": {}},
+ \\ []
+ \\]
+;
+
+fn testAllTypes(source: anytype, large_buffer: bool) !void {
+ try expectPeekNext(source, .array_begin, .array_begin);
+ try expectPeekNext(source, .string, Token{ .string = "" });
+ try expectPeekNext(source, .string, Token{ .partial_string = "a" });
+ try expectPeekNext(source, .string, Token{ .partial_string_escaped_1 = "\n".* });
+ if (large_buffer) {
+ try expectPeekNext(source, .string, Token{ .string = "b" });
+ } else {
+ try expectPeekNext(source, .string, Token{ .partial_string = "b" });
+ try expectPeekNext(source, .string, Token{ .string = "" });
+ }
+ if (large_buffer) {
+ try expectPeekNext(source, .number, Token{ .number = "0" });
+ } else {
+ try expectPeekNext(source, .number, Token{ .partial_number = "0" });
+ try expectPeekNext(source, .number, Token{ .number = "" });
+ }
+ if (large_buffer) {
+ try expectPeekNext(source, .number, Token{ .number = "0.0" });
+ } else {
+ try expectPeekNext(source, .number, Token{ .partial_number = "0" });
+ try expectPeekNext(source, .number, Token{ .partial_number = "." });
+ try expectPeekNext(source, .number, Token{ .partial_number = "0" });
+ try expectPeekNext(source, .number, Token{ .number = "" });
+ }
+ if (large_buffer) {
+ try expectPeekNext(source, .number, Token{ .number = "-1.1e-1" });
+ } else {
+ try expectPeekNext(source, .number, Token{ .partial_number = "-" });
+ try expectPeekNext(source, .number, Token{ .partial_number = "1" });
+ try expectPeekNext(source, .number, Token{ .partial_number = "." });
+ try expectPeekNext(source, .number, Token{ .partial_number = "1" });
+ try expectPeekNext(source, .number, Token{ .partial_number = "e" });
+ try expectPeekNext(source, .number, Token{ .partial_number = "-" });
+ try expectPeekNext(source, .number, Token{ .partial_number = "1" });
+ try expectPeekNext(source, .number, Token{ .number = "" });
+ }
+ try expectPeekNext(source, .true, .true);
+ try expectPeekNext(source, .false, .false);
+ try expectPeekNext(source, .null, .null);
+ try expectPeekNext(source, .object_begin, .object_begin);
+ if (large_buffer) {
+ try expectPeekNext(source, .string, Token{ .string = "a" });
+ } else {
+ try expectPeekNext(source, .string, Token{ .partial_string = "a" });
+ try expectPeekNext(source, .string, Token{ .string = "" });
+ }
+ try expectPeekNext(source, .object_begin, .object_begin);
+ try expectPeekNext(source, .object_end, .object_end);
+ try expectPeekNext(source, .object_end, .object_end);
+ try expectPeekNext(source, .array_begin, .array_begin);
+ try expectPeekNext(source, .array_end, .array_end);
+ try expectPeekNext(source, .array_end, .array_end);
+ try expectPeekNext(source, .end_of_document, .end_of_document);
+}
+
+test "peek all types" {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, all_types_test_case);
+ defer scanner.deinit();
+ try testAllTypes(&scanner, true);
+
+ var stream = std.io.fixedBufferStream(all_types_test_case);
+ var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ defer json_reader.deinit();
+ try testAllTypes(&json_reader, true);
+
+ var tiny_stream = std.io.fixedBufferStream(all_types_test_case);
+ var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader());
+ defer tiny_json_reader.deinit();
+ try testAllTypes(&tiny_json_reader, false);
+}
+
+test "json.token mismatched close" {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }");
+ defer scanner.deinit();
+ try expectNext(&scanner, .array_begin);
+ try expectNext(&scanner, Token{ .number = "102" });
+ try expectNext(&scanner, Token{ .number = "111" });
+ try expectNext(&scanner, Token{ .number = "111" });
+ try std.testing.expectError(error.SyntaxError, scanner.next());
+}
+
+test "json.token premature object close" {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "{ \"key\": }");
+ defer scanner.deinit();
+ try expectNext(&scanner, .object_begin);
+ try expectNext(&scanner, Token{ .string = "key" });
+ try std.testing.expectError(error.SyntaxError, scanner.next());
+}
+
+test "JsonScanner basic" {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str);
+ defer scanner.deinit();
+
+ while (true) {
+ const token = try scanner.next();
+ if (token == .end_of_document) break;
+ }
+}
+
+test "JsonReader basic" {
+ var stream = std.io.fixedBufferStream(example_document_str);
+
+ var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ defer json_reader.deinit();
+
+ while (true) {
+ const token = try json_reader.next();
+ if (token == .end_of_document) break;
+ }
+}
+
+const number_test_stems = .{
+ .{ "", "-" },
+ .{ "0", "1", "10", "9999999999999999999999999" },
+ .{ "", ".0", ".999999999999999999999999" },
+ .{ "", "e0", "E0", "e+0", "e-0", "e9999999999999999999999999999" },
+};
+const number_test_items = blk: {
+ comptime var ret: []const []const u8 = &[_][]const u8{};
+ for (number_test_stems[0]) |s0| {
+ for (number_test_stems[1]) |s1| {
+ for (number_test_stems[2]) |s2| {
+ for (number_test_stems[3]) |s3| {
+ ret = ret ++ &[_][]const u8{s0 ++ s1 ++ s2 ++ s3};
+ }
+ }
+ }
+ }
+ break :blk ret;
+};
+
+test "numbers" {
+ for (number_test_items) |number_str| {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, number_str);
+ defer scanner.deinit();
+
+ const token = try scanner.next();
+ const value = token.number; // assert this is a number
+ try std.testing.expectEqualStrings(number_str, value);
+
+ try std.testing.expectEqual(Token.end_of_document, try scanner.next());
+ }
+}
+
+const string_test_cases = .{
+ // The left is JSON without the "quotes".
+ // The right is the expected unescaped content.
+ .{ "", "" },
+ .{ "\\\\", "\\" },
+ .{ "a\\\\b", "a\\b" },
+ .{ "a\\\"b", "a\"b" },
+ .{ "\\n", "\n" },
+ .{ "\\u000a", "\n" },
+ .{ "𝄞", "\u{1D11E}" },
+ .{ "\\uD834\\uDD1E", "\u{1D11E}" },
+ .{ "\\uff20", "@" },
+};
+
+test "strings" {
+ inline for (string_test_cases) |tuple| {
+ var stream = std.io.fixedBufferStream("\"" ++ tuple[0] ++ "\"");
+ var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena.deinit();
+ var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ defer json_reader.deinit();
+
+ const token = try json_reader.nextAlloc(arena.allocator(), .alloc_if_needed);
+ const value = switch (token) {
+ .string => |value| value,
+ .allocated_string => |value| value,
+ else => return error.ExpectedString,
+ };
+ try std.testing.expectEqualStrings(tuple[1], value);
+
+ try std.testing.expectEqual(Token.end_of_document, try json_reader.next());
+ }
+}
+
+const nesting_test_cases = .{
+ .{ null, "[]" },
+ .{ null, "{}" },
+ .{ error.SyntaxError, "[}" },
+ .{ error.SyntaxError, "{]" },
+ .{ null, "[" ** 1000 ++ "]" ** 1000 },
+ .{ null, "{\"\":" ** 1000 ++ "0" ++ "}" ** 1000 },
+ .{ error.SyntaxError, "[" ** 1000 ++ "]" ** 999 ++ "}" },
+ .{ error.SyntaxError, "{\"\":" ** 1000 ++ "0" ++ "}" ** 999 ++ "]" },
+ .{ error.SyntaxError, "[" ** 1000 ++ "]" ** 1001 },
+ .{ error.SyntaxError, "{\"\":" ** 1000 ++ "0" ++ "}" ** 1001 },
+ .{ error.UnexpectedEndOfInput, "[" ** 1000 ++ "]" ** 999 },
+ .{ error.UnexpectedEndOfInput, "{\"\":" ** 1000 ++ "0" ++ "}" ** 999 },
+};
+
+test "nesting" {
+ inline for (nesting_test_cases) |tuple| {
+ const maybe_error = tuple[0];
+ const document_str = tuple[1];
+
+ expectMaybeError(document_str, maybe_error) catch |err| {
+ std.debug.print("in json document: {s}\n", .{document_str});
+ return err;
+ };
+ }
+}
+
+fn expectMaybeError(document_str: []const u8, maybe_error: ?Error) !void {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, document_str);
+ defer scanner.deinit();
+
+ while (true) {
+ const token = scanner.next() catch |err| {
+ if (maybe_error) |expected_err| {
+ if (err == expected_err) return;
+ }
+ return err;
+ };
+ if (token == .end_of_document) break;
+ }
+ if (maybe_error != null) return error.ExpectedError;
+}
+
+fn expectEqualTokens(expected_token: Token, actual_token: Token) !void {
+ try std.testing.expectEqual(std.meta.activeTag(expected_token), std.meta.activeTag(actual_token));
+ switch (expected_token) {
+ .number => |expected_value| {
+ try std.testing.expectEqualStrings(expected_value, actual_token.number);
+ },
+ .string => |expected_value| {
+ try std.testing.expectEqualStrings(expected_value, actual_token.string);
+ },
+ else => {},
+ }
+}
+
+fn testTinyBufferSize(document_str: []const u8) !void {
+ var tiny_stream = std.io.fixedBufferStream(document_str);
+ var normal_stream = std.io.fixedBufferStream(document_str);
+
+ var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader());
+ defer tiny_json_reader.deinit();
+ var normal_json_reader = JsonReader(0x1000, @TypeOf(normal_stream.reader())).init(std.testing.allocator, normal_stream.reader());
+ defer normal_json_reader.deinit();
+
+ expectEqualStreamOfTokens(&normal_json_reader, &tiny_json_reader) catch |err| {
+ std.debug.print("in json document: {s}\n", .{document_str});
+ return err;
+ };
+}
+fn expectEqualStreamOfTokens(control_json_reader: anytype, test_json_reader: anytype) !void {
+ var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena.deinit();
+ while (true) {
+ const control_token = try control_json_reader.nextAlloc(arena.allocator(), .alloc_always);
+ const test_token = try test_json_reader.nextAlloc(arena.allocator(), .alloc_always);
+ try expectEqualTokens(control_token, test_token);
+ if (control_token == .end_of_document) break;
+ _ = arena.reset(.retain_capacity);
+ }
+}
+
+test "BufferUnderrun" {
+ try testTinyBufferSize(example_document_str);
+ for (number_test_items) |number_str| {
+ try testTinyBufferSize(number_str);
+ }
+ inline for (string_test_cases) |tuple| {
+ try testTinyBufferSize("\"" ++ tuple[0] ++ "\"");
+ }
+}
+
+test "json.validate" {
+ try std.testing.expectEqual(true, try validate(std.testing.allocator, "{}"));
+ try std.testing.expectEqual(true, try validate(std.testing.allocator, "[]"));
+ try std.testing.expectEqual(false, try validate(std.testing.allocator, "[{[[[[{}]]]]}]"));
+ try std.testing.expectEqual(false, try validate(std.testing.allocator, "{]"));
+ try std.testing.expectEqual(false, try validate(std.testing.allocator, "[}"));
+ try std.testing.expectEqual(false, try validate(std.testing.allocator, "{{{{[]}}}]"));
+}
+
+fn testSkipValue(s: []const u8) !void {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s);
+ defer scanner.deinit();
+ try scanner.skipValue();
+ try expectEqualTokens(.end_of_document, try scanner.next());
+
+ var stream = std.io.fixedBufferStream(s);
+ var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ defer json_reader.deinit();
+ try json_reader.skipValue();
+ try expectEqualTokens(.end_of_document, try json_reader.next());
+}
+
+test "skipValue" {
+ try testSkipValue("false");
+ try testSkipValue("true");
+ try testSkipValue("null");
+ try testSkipValue("42");
+ try testSkipValue("42.0");
+ try testSkipValue("\"foo\"");
+ try testSkipValue("[101, 111, 121]");
+ try testSkipValue("{}");
+ try testSkipValue("{\"foo\": \"bar\\nbaz\"}");
+
+ // An absurd number of nestings
+ const nestings = 1000;
+ try testSkipValue("[" ** nestings ++ "]" ** nestings);
+
+ // Would a number token cause problems in a deeply-nested array?
+ try testSkipValue("[" ** nestings ++ "0.118, 999, 881.99, 911.9, 725, 3" ++ "]" ** nestings);
+
+ // Mismatched brace/square bracket
+ try std.testing.expectError(error.SyntaxError, testSkipValue("[102, 111, 111}"));
+}
+
+fn testEnsureStackCapacity(do_ensure: bool) !void {
+ var fail_alloc = std.testing.FailingAllocator.init(std.testing.allocator, 1);
+ const failing_allocator = fail_alloc.allocator();
+
+ const nestings = 999; // intentionally not a power of 2.
+ var scanner = JsonScanner.initCompleteInput(failing_allocator, "[" ** nestings ++ "]" ** nestings);
+ defer scanner.deinit();
+
+ if (do_ensure) {
+ try scanner.ensureTotalStackCapacity(nestings);
+ }
+
+ try scanner.skipValue();
+ try std.testing.expectEqual(Token.end_of_document, try scanner.next());
+}
+test "ensureTotalStackCapacity" {
+ // Once to demonstrate failure.
+ try std.testing.expectError(error.OutOfMemory, testEnsureStackCapacity(false));
+ // Then to demonstrate it works.
+ try testEnsureStackCapacity(true);
+}
+
+fn testDiagnosticsFromSource(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, source: anytype) !void {
+ var diagnostics = Diagnostics{};
+ source.enableDiagnostics(&diagnostics);
+
+ if (expected_error) |expected_err| {
+ try std.testing.expectError(expected_err, source.skipValue());
+ } else {
+ try source.skipValue();
+ try std.testing.expectEqual(Token.end_of_document, try source.next());
+ }
+ try std.testing.expectEqual(line, diagnostics.getLine());
+ try std.testing.expectEqual(col, diagnostics.getColumn());
+ try std.testing.expectEqual(byte_offset, diagnostics.getByteOffset());
+}
+fn testDiagnostics(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, s: []const u8) !void {
+ var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s);
+ defer scanner.deinit();
+ try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &scanner);
+
+ var tiny_stream = std.io.fixedBufferStream(s);
+ var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader());
+ defer tiny_json_reader.deinit();
+ try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &tiny_json_reader);
+
+ var medium_stream = std.io.fixedBufferStream(s);
+ var medium_json_reader = JsonReader(5, @TypeOf(medium_stream.reader())).init(std.testing.allocator, medium_stream.reader());
+ defer medium_json_reader.deinit();
+ try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &medium_json_reader);
+}
+test "enableDiagnostics" {
+ try testDiagnostics(error.UnexpectedEndOfInput, 1, 1, 0, "");
+ try testDiagnostics(null, 1, 3, 2, "[]");
+ try testDiagnostics(null, 2, 2, 3, "[\n]");
+ try testDiagnostics(null, 14, 2, example_document_str.len, example_document_str);
+
+ try testDiagnostics(error.SyntaxError, 3, 1, 25,
+ \\{
+ \\ "common": "mistake",
+ \\}
+ );
+
+ inline for ([_]comptime_int{ 5, 6, 7, 99 }) |reps| {
+ // The error happens 1 byte before the end.
+ const s = "[" ** reps ++ "}";
+ try testDiagnostics(error.SyntaxError, 1, s.len, s.len - 1, s);
+ }
+}
lib/std/json/static.zig
@@ -0,0 +1,621 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
+
+const Scanner = @import("./scanner.zig").Scanner;
+const Token = @import("./scanner.zig").Token;
+const AllocWhen = @import("./scanner.zig").AllocWhen;
+const default_max_value_len = @import("./scanner.zig").default_max_value_len;
+const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger;
+
+pub const ParseOptions = struct {
+ /// Behaviour when a duplicate field is encountered.
+ duplicate_field_behavior: enum {
+ use_first,
+ @"error",
+ use_last,
+ } = .@"error",
+
+ /// If false, finding an unknown field returns an error.
+ ignore_unknown_fields: bool = false,
+
+ /// Passed to json.Scanner.nextAllocMax() or json.Reader.nextAllocMax().
+ /// The default for parseFromSlice() or parseFromTokenSource() with a *json.Scanner input
+ /// is the length of the input slice, which means error.ValueTooLong will never be returned.
+ /// The default for parseFromTokenSource() with a *json.Reader is default_max_value_len.
+ max_value_len: ?usize = null,
+};
+
+/// Parses the json document from s and returns the result.
+/// The provided allocator is used both for temporary allocations during parsing the document,
+/// and also to allocate any pointer values in the return type.
+/// If T contains any pointers, free the memory with `std.json.parseFree`.
+/// Note that `error.BufferUnderrun` is not actually possible to return from this function.
+pub fn parseFromSlice(comptime T: type, allocator: Allocator, s: []const u8, options: ParseOptions) ParseError(T, Scanner)!T {
+ var scanner = Scanner.initCompleteInput(allocator, s);
+ defer scanner.deinit();
+
+ return parseFromTokenSource(T, allocator, &scanner, options);
+}
+
+/// `scanner_or_reader` must be either a `*std.json.Scanner` with complete input or a `*std.json.Reader`.
+/// allocator is used to allocate the data of T if necessary,
+/// such as if T is `*u32` or `[]u32`.
+/// If T contains any pointers, free the memory with `std.json.parseFree`.
+/// If T contains no pointers, the allocator may sometimes be used for temporary allocations,
+/// but no call to `std.json.parseFree` will be necessary;
+/// all temporary allocations will be freed before this function returns.
+/// Note that `error.BufferUnderrun` is not actually possible to return from this function.
+pub fn parseFromTokenSource(comptime T: type, allocator: Allocator, scanner_or_reader: anytype, options: ParseOptions) ParseError(T, @TypeOf(scanner_or_reader.*))!T {
+ if (@TypeOf(scanner_or_reader.*) == Scanner) {
+ assert(scanner_or_reader.is_end_of_input);
+ }
+
+ var resolved_options = options;
+ if (resolved_options.max_value_len == null) {
+ if (@TypeOf(scanner_or_reader.*) == Scanner) {
+ resolved_options.max_value_len = scanner_or_reader.input.len;
+ } else {
+ resolved_options.max_value_len = default_max_value_len;
+ }
+ }
+
+ const r = try parseInternal(T, allocator, scanner_or_reader, resolved_options);
+ errdefer parseFree(T, allocator, r);
+
+ assert(.end_of_document == try scanner_or_reader.next());
+
+ return r;
+}
+
+/// The error set that will be returned from parsing T from *Source.
+/// Note that this may contain error.BufferUnderrun, but that error will never actually be returned.
+pub fn ParseError(comptime T: type, comptime Source: type) type {
+ // `inferred_types` is used to avoid infinite recursion for recursive type definitions.
+ const inferred_types = [_]type{};
+ // A few of these will either always be present or present enough of the time that
+ // omitting them is more confusing than always including them.
+ return error{UnexpectedToken} || Source.NextError || Source.PeekError ||
+ ParseInternalErrorImpl(T, Source, &inferred_types);
+}
+
+fn ParseInternalErrorImpl(comptime T: type, comptime Source: type, comptime inferred_types: []const type) type {
+ for (inferred_types) |ty| {
+ if (T == ty) return error{};
+ }
+
+ switch (@typeInfo(T)) {
+ .Bool => return error{},
+ .Float, .ComptimeFloat => return Source.AllocError || std.fmt.ParseFloatError,
+ .Int, .ComptimeInt => {
+ return Source.AllocError || error{ InvalidNumber, Overflow } ||
+ std.fmt.ParseIntError || std.fmt.ParseFloatError;
+ },
+ .Optional => |optional_info| return ParseInternalErrorImpl(optional_info.child, Source, inferred_types ++ [_]type{T}),
+ .Enum => return Source.AllocError || error{InvalidEnumTag},
+ .Union => |unionInfo| {
+ if (unionInfo.tag_type) |_| {
+ var errors = Source.AllocError || error{UnknownField};
+ for (unionInfo.fields) |u_field| {
+ errors = errors || ParseInternalErrorImpl(u_field.type, Source, inferred_types ++ [_]type{T});
+ }
+ return errors;
+ } else {
+ @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
+ }
+ },
+ .Struct => |structInfo| {
+ var errors = Scanner.AllocError || error{
+ DuplicateField,
+ UnknownField,
+ MissingField,
+ };
+ for (structInfo.fields) |field| {
+ errors = errors || ParseInternalErrorImpl(field.type, Source, inferred_types ++ [_]type{T});
+ }
+ return errors;
+ },
+ .Array => |arrayInfo| {
+ return error{LengthMismatch} ||
+ ParseInternalErrorImpl(arrayInfo.child, Source, inferred_types ++ [_]type{T});
+ },
+ .Vector => |vecInfo| {
+ return error{LengthMismatch} ||
+ ParseInternalErrorImpl(vecInfo.child, Source, inferred_types ++ [_]type{T});
+ },
+ .Pointer => |ptrInfo| {
+ switch (ptrInfo.size) {
+ .One, .Slice => {
+ return ParseInternalErrorImpl(ptrInfo.child, Source, inferred_types ++ [_]type{T});
+ },
+ else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+ }
+ },
+ else => return error{},
+ }
+ unreachable;
+}
+
+fn parseInternal(
+ comptime T: type,
+ allocator: Allocator,
+ source: anytype,
+ options: ParseOptions,
+) ParseError(T, @TypeOf(source.*))!T {
+ switch (@typeInfo(T)) {
+ .Bool => {
+ return switch (try source.next()) {
+ .true => true,
+ .false => false,
+ else => error.UnexpectedToken,
+ };
+ },
+ .Float, .ComptimeFloat => {
+ const token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?);
+ defer freeAllocated(allocator, token);
+ const slice = switch (token) {
+ .number, .string => |slice| slice,
+ .allocated_number, .allocated_string => |slice| slice,
+ else => return error.UnexpectedToken,
+ };
+ return try std.fmt.parseFloat(T, slice);
+ },
+ .Int, .ComptimeInt => {
+ const token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?);
+ defer freeAllocated(allocator, token);
+ const slice = switch (token) {
+ .number, .string => |slice| slice,
+ .allocated_number, .allocated_string => |slice| slice,
+ else => return error.UnexpectedToken,
+ };
+ if (isNumberFormattedLikeAnInteger(slice))
+ return std.fmt.parseInt(T, slice, 10);
+ // Try to coerce a float to an integer.
+ const float = try std.fmt.parseFloat(f128, slice);
+ if (@round(float) != float) return error.InvalidNumber;
+ if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow;
+ return @floatToInt(T, float);
+ },
+ .Optional => |optionalInfo| {
+ switch (try source.peekNextTokenType()) {
+ .null => {
+ _ = try source.next();
+ return null;
+ },
+ else => {
+ return try parseInternal(optionalInfo.child, allocator, source, options);
+ },
+ }
+ },
+ .Enum => |enumInfo| {
+ const token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?);
+ defer freeAllocated(allocator, token);
+ const slice = switch (token) {
+ .number, .string => |slice| slice,
+ .allocated_number, .allocated_string => |slice| slice,
+ else => return error.UnexpectedToken,
+ };
+ // Check for a named value.
+ if (std.meta.stringToEnum(T, slice)) |value| return value;
+ // Check for a numeric value.
+ if (!isNumberFormattedLikeAnInteger(slice)) return error.InvalidEnumTag;
+ const n = std.fmt.parseInt(enumInfo.tag_type, slice, 10) catch return error.InvalidEnumTag;
+ return try std.meta.intToEnum(T, n);
+ },
+ .Union => |unionInfo| {
+ const UnionTagType = unionInfo.tag_type orelse @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
+
+ if (.object_begin != try source.next()) return error.UnexpectedToken;
+
+ var result: ?T = null;
+ errdefer {
+ if (result) |r| {
+ inline for (unionInfo.fields) |u_field| {
+ if (r == @field(UnionTagType, u_field.name)) {
+ parseFree(u_field.type, allocator, @field(r, u_field.name));
+ }
+ }
+ }
+ }
+
+ var name_token: ?Token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?);
+ errdefer {
+ if (name_token) |t| {
+ freeAllocated(allocator, t);
+ }
+ }
+ const field_name = switch (name_token.?) {
+ .string => |slice| slice,
+ .allocated_string => |slice| slice,
+ else => return error.UnexpectedToken,
+ };
+
+ inline for (unionInfo.fields) |u_field| {
+ if (std.mem.eql(u8, u_field.name, field_name)) {
+ // Free the name token now in case we're using an allocator that optimizes freeing the last allocated object.
+ // (Recursing into parseInternal() might trigger more allocations.)
+ freeAllocated(allocator, name_token.?);
+ name_token = null;
+
+ if (u_field.type == void) {
+ // void isn't really a json type, but we can support void payload union tags with {} as a value.
+ if (.object_begin != try source.next()) return error.UnexpectedToken;
+ if (.object_end != try source.next()) return error.UnexpectedToken;
+ result = @unionInit(T, u_field.name, {});
+ } else {
+ // Recurse.
+ result = @unionInit(T, u_field.name, try parseInternal(u_field.type, allocator, source, options));
+ }
+ break;
+ }
+ } else {
+ // Didn't match anything.
+ return error.UnknownField;
+ }
+
+ if (.object_end != try source.next()) return error.UnexpectedToken;
+
+ return result.?;
+ },
+
+ .Struct => |structInfo| {
+ if (structInfo.is_tuple) {
+ if (.array_begin != try source.next()) return error.UnexpectedToken;
+
+ var r: T = undefined;
+ var fields_seen: usize = 0;
+ errdefer {
+ inline for (0..structInfo.fields.len) |i| {
+ if (i < fields_seen) {
+ parseFree(structInfo.fields[i].type, allocator, r[i]);
+ }
+ }
+ }
+ inline for (0..structInfo.fields.len) |i| {
+ r[i] = try parseInternal(structInfo.fields[i].type, allocator, source, options);
+ fields_seen = i + 1;
+ }
+
+ if (.array_end != try source.next()) return error.UnexpectedToken;
+
+ return r;
+ }
+
+ if (.object_begin != try source.next()) return error.UnexpectedToken;
+
+ var r: T = undefined;
+ var fields_seen = [_]bool{false} ** structInfo.fields.len;
+ errdefer {
+ inline for (structInfo.fields, 0..) |field, i| {
+ if (fields_seen[i]) {
+ parseFree(field.type, allocator, @field(r, field.name));
+ }
+ }
+ }
+
+ while (true) {
+ var name_token: ?Token = try source.nextAllocMax(allocator, .alloc_if_needed, options.max_value_len.?);
+ errdefer {
+ if (name_token) |t| {
+ freeAllocated(allocator, t);
+ }
+ }
+ const field_name = switch (name_token.?) {
+ .object_end => break, // No more fields.
+ .string => |slice| slice,
+ .allocated_string => |slice| slice,
+ else => return error.UnexpectedToken,
+ };
+
+ inline for (structInfo.fields, 0..) |field, i| {
+ if (field.is_comptime) @compileError("comptime fields are not supported: " ++ @typeName(T) ++ "." ++ field.name);
+ if (std.mem.eql(u8, field.name, field_name)) {
+ // Free the name token now in case we're using an allocator that optimizes freeing the last allocated object.
+ // (Recursing into parseInternal() might trigger more allocations.)
+ freeAllocated(allocator, name_token.?);
+ name_token = null;
+
+ if (fields_seen[i]) {
+ switch (options.duplicate_field_behavior) {
+ .use_first => {
+ // Parse and then delete the redundant value.
+ // We don't want to skip the value, because we want type checking.
+ const ignored_value = try parseInternal(field.type, allocator, source, options);
+ parseFree(field.type, allocator, ignored_value);
+ break;
+ },
+ .@"error" => return error.DuplicateField,
+ .use_last => {
+ // Delete the stale value. We're about to get a new one.
+ parseFree(field.type, allocator, @field(r, field.name));
+ fields_seen[i] = false;
+ },
+ }
+ }
+ @field(r, field.name) = try parseInternal(field.type, allocator, source, options);
+ fields_seen[i] = true;
+ break;
+ }
+ } else {
+ // Didn't match anything.
+ freeAllocated(allocator, name_token.?);
+ if (options.ignore_unknown_fields) {
+ try source.skipValue();
+ } else {
+ return error.UnknownField;
+ }
+ }
+ }
+ inline for (structInfo.fields, 0..) |field, i| {
+ if (!fields_seen[i]) {
+ if (field.default_value) |default_ptr| {
+ const default = @ptrCast(*align(1) const field.type, default_ptr).*;
+ @field(r, field.name) = default;
+ } else {
+ return error.MissingField;
+ }
+ }
+ }
+ return r;
+ },
+
+ .Array => |arrayInfo| {
+ switch (try source.peekNextTokenType()) {
+ .array_begin => {
+ // Typical array.
+ return parseInternalArray(T, arrayInfo.child, arrayInfo.len, allocator, source, options);
+ },
+ .string => {
+ if (arrayInfo.child != u8) return error.UnexpectedToken;
+ // Fixed-length string.
+
+ var r: T = undefined;
+ var i: usize = 0;
+ while (true) {
+ switch (try source.next()) {
+ .string => |slice| {
+ if (i + slice.len != r.len) return error.LengthMismatch;
+ @memcpy(r[i..][0..slice.len], slice);
+ break;
+ },
+ .partial_string => |slice| {
+ if (i + slice.len > r.len) return error.LengthMismatch;
+ @memcpy(r[i..][0..slice.len], slice);
+ i += slice.len;
+ },
+ .partial_string_escaped_1 => |arr| {
+ if (i + arr.len > r.len) return error.LengthMismatch;
+ @memcpy(r[i..][0..arr.len], arr[0..]);
+ i += arr.len;
+ },
+ .partial_string_escaped_2 => |arr| {
+ if (i + arr.len > r.len) return error.LengthMismatch;
+ @memcpy(r[i..][0..arr.len], arr[0..]);
+ i += arr.len;
+ },
+ .partial_string_escaped_3 => |arr| {
+ if (i + arr.len > r.len) return error.LengthMismatch;
+ @memcpy(r[i..][0..arr.len], arr[0..]);
+ i += arr.len;
+ },
+ .partial_string_escaped_4 => |arr| {
+ if (i + arr.len > r.len) return error.LengthMismatch;
+ @memcpy(r[i..][0..arr.len], arr[0..]);
+ i += arr.len;
+ },
+ else => unreachable,
+ }
+ }
+
+ return r;
+ },
+
+ else => return error.UnexpectedToken,
+ }
+ },
+
+ .Vector => |vecInfo| {
+ switch (try source.peekNextTokenType()) {
+ .array_begin => {
+ return parseInternalArray(T, vecInfo.child, vecInfo.len, allocator, source, options);
+ },
+ else => return error.UnexpectedToken,
+ }
+ },
+
+ .Pointer => |ptrInfo| {
+ switch (ptrInfo.size) {
+ .One => {
+ const r: *ptrInfo.child = try allocator.create(ptrInfo.child);
+ errdefer allocator.destroy(r);
+ r.* = try parseInternal(ptrInfo.child, allocator, source, options);
+ return r;
+ },
+ .Slice => {
+ switch (try source.peekNextTokenType()) {
+ .array_begin => {
+ _ = try source.next();
+
+ // Typical array.
+ var arraylist = ArrayList(ptrInfo.child).init(allocator);
+ errdefer {
+ while (arraylist.popOrNull()) |v| {
+ parseFree(ptrInfo.child, allocator, v);
+ }
+ arraylist.deinit();
+ }
+
+ while (true) {
+ switch (try source.peekNextTokenType()) {
+ .array_end => {
+ _ = try source.next();
+ break;
+ },
+ else => {},
+ }
+
+ try arraylist.ensureUnusedCapacity(1);
+ arraylist.appendAssumeCapacity(try parseInternal(ptrInfo.child, allocator, source, options));
+ }
+
+ if (ptrInfo.sentinel) |some| {
+ const sentinel_value = @ptrCast(*align(1) const ptrInfo.child, some).*;
+ return try arraylist.toOwnedSliceSentinel(sentinel_value);
+ }
+
+ return try arraylist.toOwnedSlice();
+ },
+ .string => {
+ if (ptrInfo.child != u8) return error.UnexpectedToken;
+
+ // Dynamic length string.
+ if (ptrInfo.sentinel) |sentinel_ptr| {
+ // Use our own array list so we can append the sentinel.
+ var value_list = ArrayList(u8).init(allocator);
+ errdefer value_list.deinit();
+ _ = try source.allocNextIntoArrayList(&value_list, .alloc_always);
+ return try value_list.toOwnedSliceSentinel(@ptrCast(*const u8, sentinel_ptr).*);
+ }
+ switch (try source.nextAllocMax(allocator, .alloc_always, options.max_value_len.?)) {
+ .allocated_string => |slice| return slice,
+ else => unreachable,
+ }
+ },
+ else => return error.UnexpectedToken,
+ }
+ },
+ else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+ }
+ },
+ else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+ }
+ unreachable;
+}
+
+fn parseInternalArray(
+ comptime T: type,
+ comptime Child: type,
+ comptime len: comptime_int,
+ allocator: Allocator,
+ source: anytype,
+ options: ParseOptions,
+) !T {
+ assert(.array_begin == try source.next());
+
+ var r: T = undefined;
+ var i: usize = 0;
+ errdefer {
+ // Without the len check `r[i]` is not allowed
+ if (len > 0) while (true) : (i -= 1) {
+ parseFree(Child, allocator, r[i]);
+ if (i == 0) break;
+ };
+ }
+ while (i < len) : (i += 1) {
+ r[i] = try parseInternal(Child, allocator, source, options);
+ }
+
+ if (.array_end != try source.next()) return error.UnexpectedToken;
+
+ return r;
+}
+
+fn freeAllocated(allocator: Allocator, token: Token) void {
+ switch (token) {
+ .allocated_number, .allocated_string => |slice| {
+ allocator.free(slice);
+ },
+ else => {},
+ }
+}
+
+/// Releases resources created by parseFromSlice() or parseFromTokenSource().
+pub fn parseFree(comptime T: type, allocator: Allocator, value: T) void {
+ switch (@typeInfo(T)) {
+ .Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {},
+ .Optional => {
+ if (value) |v| {
+ return parseFree(@TypeOf(v), allocator, v);
+ }
+ },
+ .Union => |unionInfo| {
+ if (unionInfo.tag_type) |UnionTagType| {
+ inline for (unionInfo.fields) |u_field| {
+ if (value == @field(UnionTagType, u_field.name)) {
+ parseFree(u_field.type, allocator, @field(value, u_field.name));
+ break;
+ }
+ }
+ } else {
+ unreachable;
+ }
+ },
+ .Struct => |structInfo| {
+ inline for (structInfo.fields) |field| {
+ var should_free = true;
+ if (field.default_value) |default| {
+ switch (@typeInfo(field.type)) {
+ // We must not attempt to free pointers to struct default values
+ .Pointer => |fieldPtrInfo| {
+ const field_value = @field(value, field.name);
+ const field_ptr = switch (fieldPtrInfo.size) {
+ .One => field_value,
+ .Slice => field_value.ptr,
+ else => unreachable, // Other pointer types are not parseable
+ };
+ const field_addr = @ptrToInt(field_ptr);
+
+ const casted_default = @ptrCast(*const field.type, @alignCast(@alignOf(field.type), default)).*;
+ const default_ptr = switch (fieldPtrInfo.size) {
+ .One => casted_default,
+ .Slice => casted_default.ptr,
+ else => unreachable, // Other pointer types are not parseable
+ };
+ const default_addr = @ptrToInt(default_ptr);
+
+ if (field_addr == default_addr) {
+ should_free = false;
+ }
+ },
+ else => {},
+ }
+ }
+ if (should_free) {
+ parseFree(field.type, allocator, @field(value, field.name));
+ }
+ }
+ },
+ .Array => |arrayInfo| {
+ for (value) |v| {
+ parseFree(arrayInfo.child, allocator, v);
+ }
+ },
+ .Vector => |vecInfo| {
+ var i: usize = 0;
+ while (i < vecInfo.len) : (i += 1) {
+ parseFree(vecInfo.child, allocator, value[i]);
+ }
+ },
+ .Pointer => |ptrInfo| {
+ switch (ptrInfo.size) {
+ .One => {
+ parseFree(ptrInfo.child, allocator, value.*);
+ allocator.destroy(value);
+ },
+ .Slice => {
+ for (value) |v| {
+ parseFree(ptrInfo.child, allocator, v);
+ }
+ allocator.free(value);
+ },
+ else => unreachable,
+ }
+ },
+ else => unreachable,
+ }
+}
+
+test {
+ _ = @import("./static_test.zig");
+}
lib/std/json/static_test.zig
@@ -0,0 +1,437 @@
+const std = @import("std");
+const testing = std.testing;
+
+const parseFromSlice = @import("./static.zig").parseFromSlice;
+const parseFromTokenSource = @import("./static.zig").parseFromTokenSource;
+const parseFree = @import("./static.zig").parseFree;
+const ParseOptions = @import("./static.zig").ParseOptions;
+const JsonScanner = @import("./scanner.zig").Scanner;
+const jsonReader = @import("./scanner.zig").reader;
+
+test "parse" {
+ try testing.expectEqual(false, try parseFromSlice(bool, testing.allocator, "false", .{}));
+ try testing.expectEqual(true, try parseFromSlice(bool, testing.allocator, "true", .{}));
+ try testing.expectEqual(@as(u1, 1), try parseFromSlice(u1, testing.allocator, "1", .{}));
+ try testing.expectError(error.Overflow, parseFromSlice(u1, testing.allocator, "50", .{}));
+ try testing.expectEqual(@as(u64, 42), try parseFromSlice(u64, testing.allocator, "42", .{}));
+ try testing.expectEqual(@as(f64, 42), try parseFromSlice(f64, testing.allocator, "42.0", .{}));
+ try testing.expectEqual(@as(?bool, null), try parseFromSlice(?bool, testing.allocator, "null", .{}));
+ try testing.expectEqual(@as(?bool, true), try parseFromSlice(?bool, testing.allocator, "true", .{}));
+
+ try testing.expectEqual(@as([3]u8, "foo".*), try parseFromSlice([3]u8, testing.allocator, "\"foo\"", .{}));
+ try testing.expectEqual(@as([3]u8, "foo".*), try parseFromSlice([3]u8, testing.allocator, "[102, 111, 111]", .{}));
+ try testing.expectEqual(@as([0]u8, undefined), try parseFromSlice([0]u8, testing.allocator, "[]", .{}));
+
+ try testing.expectEqual(@as(u64, 12345678901234567890), try parseFromSlice(u64, testing.allocator, "\"12345678901234567890\"", .{}));
+ try testing.expectEqual(@as(f64, 123.456), try parseFromSlice(f64, testing.allocator, "\"123.456\"", .{}));
+}
+
+test "parse into enum" {
+ const T = enum(u32) {
+ Foo = 42,
+ Bar,
+ @"with\\escape",
+ };
+ try testing.expectEqual(@as(T, .Foo), try parseFromSlice(T, testing.allocator, "\"Foo\"", .{}));
+ try testing.expectEqual(@as(T, .Foo), try parseFromSlice(T, testing.allocator, "42", .{}));
+ try testing.expectEqual(@as(T, .@"with\\escape"), try parseFromSlice(T, testing.allocator, "\"with\\\\escape\"", .{}));
+ try testing.expectError(error.InvalidEnumTag, parseFromSlice(T, testing.allocator, "5", .{}));
+ try testing.expectError(error.InvalidEnumTag, parseFromSlice(T, testing.allocator, "\"Qux\"", .{}));
+}
+
+test "parse into that allocates a slice" {
+ {
+ // string as string
+ const r = try parseFromSlice([]u8, testing.allocator, "\"foo\"", .{});
+ defer parseFree([]u8, testing.allocator, r);
+ try testing.expectEqualSlices(u8, "foo", r);
+ }
+ {
+ // string as array of u8 integers
+ const r = try parseFromSlice([]u8, testing.allocator, "[102, 111, 111]", .{});
+ defer parseFree([]u8, testing.allocator, r);
+ try testing.expectEqualSlices(u8, "foo", r);
+ }
+ {
+ const r = try parseFromSlice([]u8, testing.allocator, "\"with\\\\escape\"", .{});
+ defer parseFree([]u8, testing.allocator, r);
+ try testing.expectEqualSlices(u8, "with\\escape", r);
+ }
+}
+
+test "parse into sentinel slice" {
+ const result = try parseFromSlice([:0]const u8, testing.allocator, "\"\\n\"", .{});
+ defer parseFree([:0]const u8, testing.allocator, result);
+ try testing.expect(std.mem.eql(u8, result, "\n"));
+}
+
+test "parse into tagged union" {
+ const T = union(enum) {
+ nothing,
+ int: i32,
+ float: f64,
+ string: []const u8,
+ };
+ try testing.expectEqual(T{ .float = 1.5 }, try parseFromSlice(T, testing.allocator, "{\"float\":1.5}", .{}));
+ try testing.expectEqual(T{ .int = 1 }, try parseFromSlice(T, testing.allocator, "{\"int\":1}", .{}));
+ try testing.expectEqual(T{ .nothing = {} }, try parseFromSlice(T, testing.allocator, "{\"nothing\":{}}", .{}));
+}
+
+test "parse into tagged union errors" {
+ const T = union(enum) {
+ nothing,
+ int: i32,
+ float: f64,
+ string: []const u8,
+ };
+ try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "42", .{}));
+ try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{}", .{}));
+ try testing.expectError(error.UnknownField, parseFromSlice(T, testing.allocator, "{\"bogus\":1}", .{}));
+ try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{\"int\":1, \"int\":1", .{}));
+ try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{\"int\":1, \"float\":1.0}", .{}));
+ try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{\"nothing\":null}", .{}));
+ try testing.expectError(error.UnexpectedToken, parseFromSlice(T, testing.allocator, "{\"nothing\":{\"no\":0}}", .{}));
+
+ // Allocator failure
+ var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0);
+ const failing_allocator = fail_alloc.allocator();
+ try testing.expectError(error.OutOfMemory, parseFromSlice(T, failing_allocator, "{\"string\"\"foo\"}", .{}));
+}
+
+test "parseFree descends into tagged union" {
+ const T = union(enum) {
+ nothing,
+ int: i32,
+ float: f64,
+ string: []const u8,
+ };
+ const r = try parseFromSlice(T, testing.allocator, "{\"string\":\"foo\"}", .{});
+ try testing.expectEqualSlices(u8, "foo", r.string);
+ parseFree(T, testing.allocator, r);
+}
+
+test "parse into struct with no fields" {
+ const T = struct {};
+ try testing.expectEqual(T{}, try parseFromSlice(T, testing.allocator, "{}", .{}));
+}
+
+const test_const_value: usize = 123;
+
+test "parse into struct with default const pointer field" {
+ const T = struct { a: *const usize = &test_const_value };
+ try testing.expectEqual(T{}, try parseFromSlice(T, testing.allocator, "{}", .{}));
+}
+
+const test_default_usize: usize = 123;
+const test_default_usize_ptr: *align(1) const usize = &test_default_usize;
+const test_default_str: []const u8 = "test str";
+const test_default_str_slice: [2][]const u8 = [_][]const u8{
+ "test1",
+ "test2",
+};
+
+test "freeing parsed structs with pointers to default values" {
+ const T = struct {
+ int: *const usize = &test_default_usize,
+ int_ptr: *allowzero align(1) const usize = test_default_usize_ptr,
+ str: []const u8 = test_default_str,
+ str_slice: []const []const u8 = &test_default_str_slice,
+ };
+
+ const parsed = try parseFromSlice(T, testing.allocator, "{}", .{});
+ try testing.expectEqual(T{}, parsed);
+ // This will panic if it tries to free global constants:
+ parseFree(T, testing.allocator, parsed);
+}
+
+test "parse into struct where destination and source lengths mismatch" {
+ const T = struct { a: [2]u8 };
+ try testing.expectError(error.LengthMismatch, parseFromSlice(T, testing.allocator, "{\"a\": \"bbb\"}", .{}));
+}
+
+test "parse into struct with misc fields" {
+ const T = struct {
+ int: i64,
+ float: f64,
+ @"with\\escape": bool,
+ @"withąunicode😂": bool,
+ language: []const u8,
+ optional: ?bool,
+ default_field: i32 = 42,
+ static_array: [3]f64,
+ dynamic_array: []f64,
+
+ complex: struct {
+ nested: []const u8,
+ },
+
+ veryComplex: []struct {
+ foo: []const u8,
+ },
+
+ a_union: Union,
+ const Union = union(enum) {
+ x: u8,
+ float: f64,
+ string: []const u8,
+ };
+ };
+ var document_str =
+ \\{
+ \\ "int": 420,
+ \\ "float": 3.14,
+ \\ "with\\escape": true,
+ \\ "with\u0105unicode\ud83d\ude02": false,
+ \\ "language": "zig",
+ \\ "optional": null,
+ \\ "static_array": [66.6, 420.420, 69.69],
+ \\ "dynamic_array": [66.6, 420.420, 69.69],
+ \\ "complex": {
+ \\ "nested": "zig"
+ \\ },
+ \\ "veryComplex": [
+ \\ {
+ \\ "foo": "zig"
+ \\ }, {
+ \\ "foo": "rocks"
+ \\ }
+ \\ ],
+ \\ "a_union": {
+ \\ "float": 100000
+ \\ }
+ \\}
+ ;
+ const r = try parseFromSlice(T, testing.allocator, document_str, .{});
+ defer parseFree(T, testing.allocator, r);
+ try testing.expectEqual(@as(i64, 420), r.int);
+ try testing.expectEqual(@as(f64, 3.14), r.float);
+ try testing.expectEqual(true, r.@"with\\escape");
+ try testing.expectEqual(false, r.@"withąunicode😂");
+ try testing.expectEqualSlices(u8, "zig", r.language);
+ try testing.expectEqual(@as(?bool, null), r.optional);
+ try testing.expectEqual(@as(i32, 42), r.default_field);
+ try testing.expectEqual(@as(f64, 66.6), r.static_array[0]);
+ try testing.expectEqual(@as(f64, 420.420), r.static_array[1]);
+ try testing.expectEqual(@as(f64, 69.69), r.static_array[2]);
+ try testing.expectEqual(@as(usize, 3), r.dynamic_array.len);
+ try testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]);
+ try testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]);
+ try testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]);
+ try testing.expectEqualSlices(u8, r.complex.nested, "zig");
+ try testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo);
+ try testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo);
+ try testing.expectEqual(T.Union{ .float = 100000 }, r.a_union);
+}
+
+test "parse into struct with strings and arrays with sentinels" {
+ const T = struct {
+ language: [:0]const u8,
+ language_without_sentinel: []const u8,
+ data: [:99]const i32,
+ simple_data: []const i32,
+ };
+ var document_str =
+ \\{
+ \\ "language": "zig",
+ \\ "language_without_sentinel": "zig again!",
+ \\ "data": [1, 2, 3],
+ \\ "simple_data": [4, 5, 6]
+ \\}
+ ;
+ const r = try parseFromSlice(T, testing.allocator, document_str, .{});
+ defer parseFree(T, testing.allocator, r);
+
+ try testing.expectEqualSentinel(u8, 0, "zig", r.language);
+
+ const data = [_:99]i32{ 1, 2, 3 };
+ try testing.expectEqualSentinel(i32, 99, data[0..data.len], r.data);
+
+ // Make sure that arrays who aren't supposed to have a sentinel still parse without one.
+ try testing.expectEqual(@as(?i32, null), std.meta.sentinel(@TypeOf(r.simple_data)));
+ try testing.expectEqual(@as(?u8, null), std.meta.sentinel(@TypeOf(r.language_without_sentinel)));
+}
+
+test "parse into struct with duplicate field" {
+ // allow allocator to detect double frees by keeping bucket in use
+ const ballast = try testing.allocator.alloc(u64, 1);
+ defer testing.allocator.free(ballast);
+
+ const options_first = ParseOptions{ .duplicate_field_behavior = .use_first };
+ const options_last = ParseOptions{ .duplicate_field_behavior = .use_last };
+
+ const str = "{ \"a\": 1, \"a\": 0.25 }";
+
+ const T1 = struct { a: *u64 };
+ // both .use_first and .use_last should fail because second "a" value isn't a u64
+ try testing.expectError(error.InvalidNumber, parseFromSlice(T1, testing.allocator, str, options_first));
+ try testing.expectError(error.InvalidNumber, parseFromSlice(T1, testing.allocator, str, options_last));
+
+ const T2 = struct { a: f64 };
+ try testing.expectEqual(T2{ .a = 1.0 }, try parseFromSlice(T2, testing.allocator, str, options_first));
+ try testing.expectEqual(T2{ .a = 0.25 }, try parseFromSlice(T2, testing.allocator, str, options_last));
+}
+
+test "parse into struct ignoring unknown fields" {
+ const T = struct {
+ int: i64,
+ language: []const u8,
+ };
+
+ var str =
+ \\{
+ \\ "int": 420,
+ \\ "float": 3.14,
+ \\ "with\\escape": true,
+ \\ "with\u0105unicode\ud83d\ude02": false,
+ \\ "optional": null,
+ \\ "static_array": [66.6, 420.420, 69.69],
+ \\ "dynamic_array": [66.6, 420.420, 69.69],
+ \\ "complex": {
+ \\ "nested": "zig"
+ \\ },
+ \\ "veryComplex": [
+ \\ {
+ \\ "foo": "zig"
+ \\ }, {
+ \\ "foo": "rocks"
+ \\ }
+ \\ ],
+ \\ "a_union": {
+ \\ "float": 100000
+ \\ },
+ \\ "language": "zig"
+ \\}
+ ;
+ const r = try parseFromSlice(T, testing.allocator, str, .{ .ignore_unknown_fields = true });
+ defer parseFree(T, testing.allocator, r);
+
+ try testing.expectEqual(@as(i64, 420), r.int);
+ try testing.expectEqualSlices(u8, "zig", r.language);
+}
+
+test "parse into tuple" {
+ const Union = union(enum) {
+ char: u8,
+ float: f64,
+ string: []const u8,
+ };
+ const T = std.meta.Tuple(&.{
+ i64,
+ f64,
+ bool,
+ []const u8,
+ ?bool,
+ struct {
+ foo: i32,
+ bar: []const u8,
+ },
+ std.meta.Tuple(&.{ u8, []const u8, u8 }),
+ Union,
+ });
+ var str =
+ \\[
+ \\ 420,
+ \\ 3.14,
+ \\ true,
+ \\ "zig",
+ \\ null,
+ \\ {
+ \\ "foo": 1,
+ \\ "bar": "zero"
+ \\ },
+ \\ [4, "två", 42],
+ \\ {"float": 12.34}
+ \\]
+ ;
+ const r = try parseFromSlice(T, testing.allocator, str, .{});
+ defer parseFree(T, testing.allocator, r);
+ try testing.expectEqual(@as(i64, 420), r[0]);
+ try testing.expectEqual(@as(f64, 3.14), r[1]);
+ try testing.expectEqual(true, r[2]);
+ try testing.expectEqualSlices(u8, "zig", r[3]);
+ try testing.expectEqual(@as(?bool, null), r[4]);
+ try testing.expectEqual(@as(i32, 1), r[5].foo);
+ try testing.expectEqualSlices(u8, "zero", r[5].bar);
+ try testing.expectEqual(@as(u8, 4), r[6][0]);
+ try testing.expectEqualSlices(u8, "två", r[6][1]);
+ try testing.expectEqual(@as(u8, 42), r[6][2]);
+ try testing.expectEqual(Union{ .float = 12.34 }, r[7]);
+}
+
+const ParseIntoRecursiveUnionDefinitionValue = union(enum) {
+ integer: i64,
+ array: []const ParseIntoRecursiveUnionDefinitionValue,
+};
+
+test "parse into recursive union definition" {
+ const T = struct {
+ values: ParseIntoRecursiveUnionDefinitionValue,
+ };
+
+ const r = try parseFromSlice(T, testing.allocator, "{\"values\":{\"array\":[{\"integer\":58}]}}", .{});
+ defer parseFree(T, testing.allocator, r);
+
+ try testing.expectEqual(@as(i64, 58), r.values.array[0].integer);
+}
+
+const ParseIntoDoubleRecursiveUnionValueFirst = union(enum) {
+ integer: i64,
+ array: []const ParseIntoDoubleRecursiveUnionValueSecond,
+};
+
+const ParseIntoDoubleRecursiveUnionValueSecond = union(enum) {
+ boolean: bool,
+ array: []const ParseIntoDoubleRecursiveUnionValueFirst,
+};
+
+test "parse into double recursive union definition" {
+ const T = struct {
+ values: ParseIntoDoubleRecursiveUnionValueFirst,
+ };
+
+ const r = try parseFromSlice(T, testing.allocator, "{\"values\":{\"array\":[{\"array\":[{\"integer\":58}]}]}}", .{});
+ defer parseFree(T, testing.allocator, r);
+
+ try testing.expectEqual(@as(i64, 58), r.values.array[0].array[0].integer);
+}
+
+test "parse exponential into int" {
+ const T = struct { int: i64 };
+ const r = try parseFromSlice(T, testing.allocator, "{ \"int\": 4.2e2 }", .{});
+ try testing.expectEqual(@as(i64, 420), r.int);
+ try testing.expectError(error.InvalidNumber, parseFromSlice(T, testing.allocator, "{ \"int\": 0.042e2 }", .{}));
+ try testing.expectError(error.Overflow, parseFromSlice(T, testing.allocator, "{ \"int\": 18446744073709551616.0 }", .{}));
+}
+
+test "parseFromTokenSource" {
+ var scanner = JsonScanner.initCompleteInput(testing.allocator, "123");
+ defer scanner.deinit();
+ try testing.expectEqual(@as(u32, 123), try parseFromTokenSource(u32, testing.allocator, &scanner, .{}));
+
+ var stream = std.io.fixedBufferStream("123");
+ var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ defer json_reader.deinit();
+ try testing.expectEqual(@as(u32, 123), try parseFromTokenSource(u32, testing.allocator, &json_reader, .{}));
+}
+
+test "max_value_len" {
+ try testing.expectError(error.ValueTooLong, parseFromSlice([]u8, testing.allocator, "\"0123456789\"", .{ .max_value_len = 5 }));
+}
+
+test "parse into vector" {
+ const T = struct {
+ vec_i32: @Vector(4, i32),
+ vec_f32: @Vector(2, f32),
+ };
+ var s =
+ \\{
+ \\ "vec_f32": [1.5, 2.5],
+ \\ "vec_i32": [4, 5, 6, 7]
+ \\}
+ ;
+ const r = try parseFromSlice(T, testing.allocator, s, .{});
+ defer parseFree(T, testing.allocator, r);
+ try testing.expectApproxEqAbs(@as(f32, 1.5), r.vec_f32[0], 0.0000001);
+ try testing.expectApproxEqAbs(@as(f32, 2.5), r.vec_f32[1], 0.0000001);
+ try testing.expectEqual(@Vector(4, i32){ 4, 5, 6, 7 }, r.vec_i32);
+}
lib/std/json/stringify.zig
@@ -0,0 +1,313 @@
+const std = @import("std");
+const mem = std.mem;
+const assert = std.debug.assert;
+
+pub const StringifyOptions = struct {
+ pub const Whitespace = struct {
+ /// How many indentation levels deep are we?
+ indent_level: usize = 0,
+
+ /// What character(s) should be used for indentation?
+ indent: union(enum) {
+ space: u8,
+ tab: void,
+ none: void,
+ } = .{ .space = 4 },
+
+ /// After a colon, should whitespace be inserted?
+ separator: bool = true,
+
+ pub fn outputIndent(
+ whitespace: @This(),
+ out_stream: anytype,
+ ) @TypeOf(out_stream).Error!void {
+ var char: u8 = undefined;
+ var n_chars: usize = undefined;
+ switch (whitespace.indent) {
+ .space => |n_spaces| {
+ char = ' ';
+ n_chars = n_spaces;
+ },
+ .tab => {
+ char = '\t';
+ n_chars = 1;
+ },
+ .none => return,
+ }
+ try out_stream.writeByte('\n');
+ n_chars *= whitespace.indent_level;
+ try out_stream.writeByteNTimes(char, n_chars);
+ }
+ };
+
+ /// Controls the whitespace emitted
+ whitespace: Whitespace = .{ .indent = .none, .separator = false },
+
+ /// Should optional fields with null value be written?
+ emit_null_optional_fields: bool = true,
+
+ string: StringOptions = StringOptions{ .String = .{} },
+
+ /// Should []u8 be serialised as a string? or an array?
+ pub const StringOptions = union(enum) {
+ Array,
+ String: StringOutputOptions,
+
+ /// String output options
+ const StringOutputOptions = struct {
+ /// Should '/' be escaped in strings?
+ escape_solidus: bool = false,
+
+ /// Should unicode characters be escaped in strings?
+ escape_unicode: bool = false,
+ };
+ };
+};
+
+fn outputUnicodeEscape(
+ codepoint: u21,
+ out_stream: anytype,
+) !void {
+ if (codepoint <= 0xFFFF) {
+ // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
+ // then it may be represented as a six-character sequence: a reverse solidus, followed
+ // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point.
+ try out_stream.writeAll("\\u");
+ try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
+ } else {
+ assert(codepoint <= 0x10FFFF);
+ // To escape an extended character that is not in the Basic Multilingual Plane,
+ // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair.
+ const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
+ const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
+ try out_stream.writeAll("\\u");
+ try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
+ try out_stream.writeAll("\\u");
+ try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
+ }
+}
+
+/// Write `string` to `writer` as a JSON encoded string.
+pub fn encodeJsonString(string: []const u8, options: StringifyOptions, writer: anytype) !void {
+ try writer.writeByte('\"');
+ try encodeJsonStringChars(string, options, writer);
+ try writer.writeByte('\"');
+}
+
+/// Write `chars` to `writer` as JSON encoded string characters.
+pub fn encodeJsonStringChars(chars: []const u8, options: StringifyOptions, writer: anytype) !void {
+ var i: usize = 0;
+ while (i < chars.len) : (i += 1) {
+ switch (chars[i]) {
+ // normal ascii character
+ 0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => |c| try writer.writeByte(c),
+ // only 2 characters that *must* be escaped
+ '\\' => try writer.writeAll("\\\\"),
+ '\"' => try writer.writeAll("\\\""),
+ // solidus is optional to escape
+ '/' => {
+ if (options.string.String.escape_solidus) {
+ try writer.writeAll("\\/");
+ } else {
+ try writer.writeByte('/');
+ }
+ },
+ // control characters with short escapes
+ // TODO: option to switch between unicode and 'short' forms?
+ 0x8 => try writer.writeAll("\\b"),
+ 0xC => try writer.writeAll("\\f"),
+ '\n' => try writer.writeAll("\\n"),
+ '\r' => try writer.writeAll("\\r"),
+ '\t' => try writer.writeAll("\\t"),
+ else => {
+ const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable;
+ // control characters (only things left with 1 byte length) should always be printed as unicode escapes
+ if (ulen == 1 or options.string.String.escape_unicode) {
+ const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable;
+ try outputUnicodeEscape(codepoint, writer);
+ } else {
+ try writer.writeAll(chars[i..][0..ulen]);
+ }
+ i += ulen - 1;
+ },
+ }
+ }
+}
+
+pub fn stringify(
+ value: anytype,
+ options: StringifyOptions,
+ out_stream: anytype,
+) !void {
+ const T = @TypeOf(value);
+ switch (@typeInfo(T)) {
+ .Float, .ComptimeFloat => {
+ return std.fmt.formatFloatScientific(value, std.fmt.FormatOptions{}, out_stream);
+ },
+ .Int, .ComptimeInt => {
+ return std.fmt.formatIntValue(value, "", std.fmt.FormatOptions{}, out_stream);
+ },
+ .Bool => {
+ return out_stream.writeAll(if (value) "true" else "false");
+ },
+ .Null => {
+ return out_stream.writeAll("null");
+ },
+ .Optional => {
+ if (value) |payload| {
+ return try stringify(payload, options, out_stream);
+ } else {
+ return try stringify(null, options, out_stream);
+ }
+ },
+ .Enum => {
+ if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
+ return value.jsonStringify(options, out_stream);
+ }
+
+ @compileError("Unable to stringify enum '" ++ @typeName(T) ++ "'");
+ },
+ .Union => {
+ if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
+ return value.jsonStringify(options, out_stream);
+ }
+
+ const info = @typeInfo(T).Union;
+ if (info.tag_type) |UnionTagType| {
+ try out_stream.writeByte('{');
+ var child_options = options;
+ child_options.whitespace.indent_level += 1;
+ inline for (info.fields) |u_field| {
+ if (value == @field(UnionTagType, u_field.name)) {
+ try child_options.whitespace.outputIndent(out_stream);
+ try encodeJsonString(u_field.name, options, out_stream);
+ try out_stream.writeByte(':');
+ if (child_options.whitespace.separator) {
+ try out_stream.writeByte(' ');
+ }
+ if (u_field.type == void) {
+ try out_stream.writeAll("{}");
+ } else {
+ try stringify(@field(value, u_field.name), child_options, out_stream);
+ }
+ break;
+ }
+ } else {
+ unreachable; // No active tag?
+ }
+ try options.whitespace.outputIndent(out_stream);
+ try out_stream.writeByte('}');
+ return;
+ } else {
+ @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'");
+ }
+ },
+ .Struct => |S| {
+ if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
+ return value.jsonStringify(options, out_stream);
+ }
+
+ try out_stream.writeByte(if (S.is_tuple) '[' else '{');
+ var field_output = false;
+ var child_options = options;
+ child_options.whitespace.indent_level += 1;
+ inline for (S.fields) |Field| {
+ // don't include void fields
+ if (Field.type == void) continue;
+
+ var emit_field = true;
+
+ // don't include optional fields that are null when emit_null_optional_fields is set to false
+ if (@typeInfo(Field.type) == .Optional) {
+ if (options.emit_null_optional_fields == false) {
+ if (@field(value, Field.name) == null) {
+ emit_field = false;
+ }
+ }
+ }
+
+ if (emit_field) {
+ if (!field_output) {
+ field_output = true;
+ } else {
+ try out_stream.writeByte(',');
+ }
+ try child_options.whitespace.outputIndent(out_stream);
+ if (!S.is_tuple) {
+ try encodeJsonString(Field.name, options, out_stream);
+ try out_stream.writeByte(':');
+ if (child_options.whitespace.separator) {
+ try out_stream.writeByte(' ');
+ }
+ }
+ try stringify(@field(value, Field.name), child_options, out_stream);
+ }
+ }
+ if (field_output) {
+ try options.whitespace.outputIndent(out_stream);
+ }
+ try out_stream.writeByte(if (S.is_tuple) ']' else '}');
+ return;
+ },
+ .ErrorSet => return stringify(@as([]const u8, @errorName(value)), options, out_stream),
+ .Pointer => |ptr_info| switch (ptr_info.size) {
+ .One => switch (@typeInfo(ptr_info.child)) {
+ .Array => {
+ const Slice = []const std.meta.Elem(ptr_info.child);
+ return stringify(@as(Slice, value), options, out_stream);
+ },
+ else => {
+ // TODO: avoid loops?
+ return stringify(value.*, options, out_stream);
+ },
+ },
+ .Many, .Slice => {
+ if (ptr_info.size == .Many and ptr_info.sentinel == null)
+ @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel");
+ const slice = if (ptr_info.size == .Many) mem.span(value) else value;
+
+ if (ptr_info.child == u8 and options.string == .String and std.unicode.utf8ValidateSlice(slice)) {
+ try encodeJsonString(slice, options, out_stream);
+ return;
+ }
+
+ try out_stream.writeByte('[');
+ var child_options = options;
+ child_options.whitespace.indent_level += 1;
+ for (slice, 0..) |x, i| {
+ if (i != 0) {
+ try out_stream.writeByte(',');
+ }
+ try child_options.whitespace.outputIndent(out_stream);
+ try stringify(x, child_options, out_stream);
+ }
+ if (slice.len != 0) {
+ try options.whitespace.outputIndent(out_stream);
+ }
+ try out_stream.writeByte(']');
+ return;
+ },
+ else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
+ },
+ .Array => return stringify(&value, options, out_stream),
+ .Vector => |info| {
+ const array: [info.len]info.child = value;
+ return stringify(&array, options, out_stream);
+ },
+ else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
+ }
+ unreachable;
+}
+
+// Same as `stringify` but accepts an Allocator and stores result in dynamically allocated memory instead of using a Writer.
+// Caller owns returned memory.
+pub fn stringifyAlloc(allocator: std.mem.Allocator, value: anytype, options: StringifyOptions) ![]const u8 {
+ var list = std.ArrayList(u8).init(allocator);
+ errdefer list.deinit();
+ try stringify(value, options, list.writer());
+ return list.toOwnedSlice();
+}
+
+test {
+ _ = @import("./stringify_test.zig");
+}
lib/std/json/stringify_test.zig
@@ -0,0 +1,280 @@
+const std = @import("std");
+const mem = std.mem;
+const testing = std.testing;
+
+const StringifyOptions = @import("stringify.zig").StringifyOptions;
+const stringify = @import("stringify.zig").stringify;
+const stringifyAlloc = @import("stringify.zig").stringifyAlloc;
+
+test "stringify null optional fields" {
+ const MyStruct = struct {
+ optional: ?[]const u8 = null,
+ required: []const u8 = "something",
+ another_optional: ?[]const u8 = null,
+ another_required: []const u8 = "something else",
+ };
+ try teststringify(
+ \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"}
+ ,
+ MyStruct{},
+ StringifyOptions{},
+ );
+ try teststringify(
+ \\{"required":"something","another_required":"something else"}
+ ,
+ MyStruct{},
+ StringifyOptions{ .emit_null_optional_fields = false },
+ );
+}
+
+test "stringify basic types" {
+ try teststringify("false", false, StringifyOptions{});
+ try teststringify("true", true, StringifyOptions{});
+ try teststringify("null", @as(?u8, null), StringifyOptions{});
+ try teststringify("null", @as(?*u32, null), StringifyOptions{});
+ try teststringify("42", 42, StringifyOptions{});
+ try teststringify("4.2e+01", 42.0, StringifyOptions{});
+ try teststringify("42", @as(u8, 42), StringifyOptions{});
+ try teststringify("42", @as(u128, 42), StringifyOptions{});
+ try teststringify("4.2e+01", @as(f32, 42), StringifyOptions{});
+ try teststringify("4.2e+01", @as(f64, 42), StringifyOptions{});
+ try teststringify("\"ItBroke\"", @as(anyerror, error.ItBroke), StringifyOptions{});
+}
+
+test "stringify string" {
+ try teststringify("\"hello\"", "hello", StringifyOptions{});
+ try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{});
+ try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{});
+ try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\u{80}\"", "with unicode\u{80}", StringifyOptions{});
+ try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", StringifyOptions{});
+ try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\u{100}\"", "with unicode\u{100}", StringifyOptions{});
+ try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\u{800}\"", "with unicode\u{800}", StringifyOptions{});
+ try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", StringifyOptions{});
+ try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", StringifyOptions{});
+ try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", StringifyOptions{});
+ try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", StringifyOptions{});
+ try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"/\"", "/", StringifyOptions{});
+ try teststringify("\"\\/\"", "/", StringifyOptions{ .string = .{ .String = .{ .escape_solidus = true } } });
+}
+
+test "stringify many-item sentinel-terminated string" {
+ try teststringify("\"hello\"", @as([*:0]const u8, "hello"), StringifyOptions{});
+ try teststringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+ try teststringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
+}
+
+test "stringify tagged unions" {
+ const T = union(enum) {
+ nothing,
+ foo: u32,
+ bar: bool,
+ };
+ try teststringify("{\"nothing\":{}}", T{ .nothing = {} }, StringifyOptions{});
+ try teststringify("{\"foo\":42}", T{ .foo = 42 }, StringifyOptions{});
+ try teststringify("{\"bar\":true}", T{ .bar = true }, StringifyOptions{});
+}
+
+test "stringify struct" {
+ try teststringify("{\"foo\":42}", struct {
+ foo: u32,
+ }{ .foo = 42 }, StringifyOptions{});
+}
+
+test "stringify struct with string as array" {
+ try teststringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, StringifyOptions{});
+ try teststringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, StringifyOptions{ .string = .Array });
+}
+
+test "stringify struct with indentation" {
+ try teststringify(
+ \\{
+ \\ "foo": 42,
+ \\ "bar": [
+ \\ 1,
+ \\ 2,
+ \\ 3
+ \\ ]
+ \\}
+ ,
+ struct {
+ foo: u32,
+ bar: [3]u32,
+ }{
+ .foo = 42,
+ .bar = .{ 1, 2, 3 },
+ },
+ StringifyOptions{
+ .whitespace = .{},
+ },
+ );
+ try teststringify(
+ "{\n\t\"foo\":42,\n\t\"bar\":[\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}",
+ struct {
+ foo: u32,
+ bar: [3]u32,
+ }{
+ .foo = 42,
+ .bar = .{ 1, 2, 3 },
+ },
+ StringifyOptions{
+ .whitespace = .{
+ .indent = .tab,
+ .separator = false,
+ },
+ },
+ );
+ try teststringify(
+ \\{"foo":42,"bar":[1,2,3]}
+ ,
+ struct {
+ foo: u32,
+ bar: [3]u32,
+ }{
+ .foo = 42,
+ .bar = .{ 1, 2, 3 },
+ },
+ StringifyOptions{
+ .whitespace = .{
+ .indent = .none,
+ .separator = false,
+ },
+ },
+ );
+}
+
+test "stringify struct with void field" {
+ try teststringify("{\"foo\":42}", struct {
+ foo: u32,
+ bar: void = {},
+ }{ .foo = 42 }, StringifyOptions{});
+}
+
+test "stringify array of structs" {
+ const MyStruct = struct {
+ foo: u32,
+ };
+ try teststringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
+ MyStruct{ .foo = 42 },
+ MyStruct{ .foo = 100 },
+ MyStruct{ .foo = 1000 },
+ }, StringifyOptions{});
+}
+
+test "stringify struct with custom stringifier" {
+ try teststringify("[\"something special\",42]", struct {
+ foo: u32,
+ const Self = @This();
+ pub fn jsonStringify(
+ value: Self,
+ options: StringifyOptions,
+ out_stream: anytype,
+ ) !void {
+ _ = value;
+ try out_stream.writeAll("[\"something special\",");
+ try stringify(42, options, out_stream);
+ try out_stream.writeByte(']');
+ }
+ }{ .foo = 42 }, StringifyOptions{});
+}
+
+test "stringify vector" {
+ try teststringify("[1,1]", @splat(2, @as(u32, 1)), StringifyOptions{});
+}
+
+test "stringify tuple" {
+ try teststringify("[\"foo\",42]", std.meta.Tuple(&.{ []const u8, usize }){ "foo", 42 }, StringifyOptions{});
+}
+
+fn teststringify(expected: []const u8, value: anytype, options: StringifyOptions) !void {
+ const ValidationWriter = struct {
+ const Self = @This();
+ pub const Writer = std.io.Writer(*Self, Error, write);
+ pub const Error = error{
+ TooMuchData,
+ DifferentData,
+ };
+
+ expected_remaining: []const u8,
+
+ fn init(exp: []const u8) Self {
+ return .{ .expected_remaining = exp };
+ }
+
+ pub fn writer(self: *Self) Writer {
+ return .{ .context = self };
+ }
+
+ fn write(self: *Self, bytes: []const u8) Error!usize {
+ if (self.expected_remaining.len < bytes.len) {
+ std.debug.print(
+ \\====== expected this output: =========
+ \\{s}
+ \\======== instead found this: =========
+ \\{s}
+ \\======================================
+ , .{
+ self.expected_remaining,
+ bytes,
+ });
+ return error.TooMuchData;
+ }
+ if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) {
+ std.debug.print(
+ \\====== expected this output: =========
+ \\{s}
+ \\======== instead found this: =========
+ \\{s}
+ \\======================================
+ , .{
+ self.expected_remaining[0..bytes.len],
+ bytes,
+ });
+ return error.DifferentData;
+ }
+ self.expected_remaining = self.expected_remaining[bytes.len..];
+ return bytes.len;
+ }
+ };
+
+ var vos = ValidationWriter.init(expected);
+ try stringify(value, options, vos.writer());
+ if (vos.expected_remaining.len > 0) return error.NotEnoughData;
+}
+
+test "stringify struct with custom stringify that returns a custom error" {
+ var ret = stringify(struct {
+ field: Field = .{},
+
+ pub const Field = struct {
+ field: ?[]*Field = null,
+
+ const Self = @This();
+ pub fn jsonStringify(_: Self, _: StringifyOptions, _: anytype) error{CustomError}!void {
+ return error.CustomError;
+ }
+ };
+ }{}, StringifyOptions{}, std.io.null_writer);
+
+ try std.testing.expectError(error.CustomError, ret);
+}
+
+test "stringify alloc" {
+ const allocator = std.testing.allocator;
+ const expected =
+ \\{"foo":"bar","answer":42,"my_friend":"sammy"}
+ ;
+ const actual = try stringifyAlloc(allocator, .{ .foo = "bar", .answer = 42, .my_friend = "sammy" }, .{});
+ defer allocator.free(actual);
+
+ try std.testing.expectEqualStrings(expected, actual);
+}
lib/std/json/test.zig
@@ -1,2960 +1,113 @@
-// RFC 8529 conformance tests.
-//
-// Tests are taken from https://github.com/nst/JSONTestSuite
-// Read also http://seriot.ch/parsing_json.php for a good overview.
-
-const std = @import("../std.zig");
-const json = std.json;
+const std = @import("std");
const testing = std.testing;
-const TokenStream = std.json.TokenStream;
-const parse = std.json.parse;
-const ParseOptions = std.json.ParseOptions;
-const parseFree = std.json.parseFree;
-const Parser = std.json.Parser;
-const mem = std.mem;
-const writeStream = std.json.writeStream;
-const Value = std.json.Value;
-const StringifyOptions = std.json.StringifyOptions;
-const stringify = std.json.stringify;
-const stringifyAlloc = std.json.stringifyAlloc;
-const StreamingParser = std.json.StreamingParser;
-const Token = std.json.Token;
-const validate = std.json.validate;
-const Array = std.json.Array;
-const ObjectMap = std.json.ObjectMap;
-const assert = std.debug.assert;
-
-fn testNonStreaming(s: []const u8) !void {
- var p = json.Parser.init(testing.allocator, false);
- defer p.deinit();
-
- var tree = try p.parse(s);
- defer tree.deinit();
-}
-
-fn ok(s: []const u8) !void {
- try testing.expect(json.validate(s));
+const Parser = @import("./dynamic.zig").Parser;
+const validate = @import("./scanner.zig").validate;
+const JsonScanner = @import("./scanner.zig").Scanner;
- try testNonStreaming(s);
+// Support for JSONTestSuite.zig
+pub fn ok(s: []const u8) !void {
+ try testLowLevelScanner(s);
+ try testHighLevelDynamicParser(s);
}
-
-fn err(s: []const u8) !void {
- try testing.expect(!json.validate(s));
-
- try testing.expect(std.meta.isError(testNonStreaming(s)));
-}
-
-fn utf8Error(s: []const u8) !void {
- try testing.expect(!json.validate(s));
-
- try testing.expectError(error.InvalidUtf8Byte, testNonStreaming(s));
+pub fn err(s: []const u8) !void {
+ try testing.expect(std.meta.isError(testLowLevelScanner(s)));
+ try testing.expect(std.meta.isError(testHighLevelDynamicParser(s)));
}
-
-fn any(s: []const u8) !void {
- _ = json.validate(s);
-
- testNonStreaming(s) catch {};
+pub fn any(s: []const u8) !void {
+ testLowLevelScanner(s) catch {};
+ testHighLevelDynamicParser(s) catch {};
}
-
-fn anyStreamingErrNonStreaming(s: []const u8) !void {
- _ = json.validate(s);
-
- try testing.expect(std.meta.isError(testNonStreaming(s)));
+fn testLowLevelScanner(s: []const u8) !void {
+ var scanner = JsonScanner.initCompleteInput(testing.allocator, s);
+ defer scanner.deinit();
+ while (true) {
+ const token = try scanner.next();
+ if (token == .end_of_document) break;
+ }
}
-
-fn roundTrip(s: []const u8) !void {
- try testing.expect(json.validate(s));
-
- var p = json.Parser.init(testing.allocator, false);
+fn testHighLevelDynamicParser(s: []const u8) !void {
+ var p = Parser.init(testing.allocator, .alloc_if_needed);
defer p.deinit();
-
var tree = try p.parse(s);
defer tree.deinit();
-
- var buf: [256]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buf);
- try tree.root.jsonStringify(.{}, fbs.writer());
-
- try testing.expectEqualStrings(s, fbs.getWritten());
}
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
// Additional tests not part of test JSONTestSuite.
-
test "y_trailing_comma_after_empty" {
try roundTrip(
\\{"1":[],"2":{},"3":"4"}
);
}
-
test "n_object_closed_missing_value" {
try err(
\\{"a":}
);
}
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-test "y_array_arraysWithSpaces" {
- try ok(
- \\[[] ]
- );
-}
-
-test "y_array_empty" {
- try roundTrip(
- \\[]
- );
-}
-
-test "y_array_empty-string" {
- try roundTrip(
- \\[""]
- );
-}
-
-test "y_array_ending_with_newline" {
- try roundTrip(
- \\["a"]
- );
-}
-
-test "y_array_false" {
- try roundTrip(
- \\[false]
- );
-}
-
-test "y_array_heterogeneous" {
- try ok(
- \\[null, 1, "1", {}]
- );
-}
-
-test "y_array_null" {
- try roundTrip(
- \\[null]
- );
-}
-
-test "y_array_with_1_and_newline" {
- try ok(
- \\[1
- \\]
- );
-}
-
-test "y_array_with_leading_space" {
- try ok(
- \\ [1]
- );
-}
-
-test "y_array_with_several_null" {
- try roundTrip(
- \\[1,null,null,null,2]
- );
-}
-
-test "y_array_with_trailing_space" {
- try ok("[2] ");
-}
-
-test "y_number_0e+1" {
- try ok(
- \\[0e+1]
- );
-}
-
-test "y_number_0e1" {
- try ok(
- \\[0e1]
- );
-}
-
-test "y_number_after_space" {
- try ok(
- \\[ 4]
- );
-}
-
-test "y_number_double_close_to_zero" {
- try ok(
- \\[-0.000000000000000000000000000000000000000000000000000000000000000000000000000001]
- );
-}
-
-test "y_number_int_with_exp" {
- try ok(
- \\[20e1]
- );
-}
-
-test "y_number" {
- try ok(
- \\[123e65]
- );
-}
-
-test "y_number_minus_zero" {
- try ok(
- \\[-0]
- );
-}
-
-test "y_number_negative_int" {
- try roundTrip(
- \\[-123]
- );
-}
-
-test "y_number_negative_one" {
- try roundTrip(
- \\[-1]
- );
-}
-
-test "y_number_negative_zero" {
- try ok(
- \\[-0]
- );
-}
-
-test "y_number_real_capital_e" {
- try ok(
- \\[1E22]
- );
-}
-
-test "y_number_real_capital_e_neg_exp" {
- try ok(
- \\[1E-2]
- );
-}
-
-test "y_number_real_capital_e_pos_exp" {
- try ok(
- \\[1E+2]
- );
-}
-
-test "y_number_real_exponent" {
- try ok(
- \\[123e45]
- );
-}
-
-test "y_number_real_fraction_exponent" {
- try ok(
- \\[123.456e78]
- );
-}
-
-test "y_number_real_neg_exp" {
- try ok(
- \\[1e-2]
- );
-}
-
-test "y_number_real_pos_exponent" {
- try ok(
- \\[1e+2]
- );
-}
-
-test "y_number_simple_int" {
- try roundTrip(
- \\[123]
- );
-}
-
-test "y_number_simple_real" {
- try ok(
- \\[123.456789]
- );
-}
-
-test "y_object_basic" {
- try roundTrip(
- \\{"asd":"sdf"}
- );
-}
-
-test "y_object_duplicated_key_and_value" {
- try ok(
- \\{"a":"b","a":"b"}
- );
-}
-
-test "y_object_duplicated_key" {
- try ok(
- \\{"a":"b","a":"c"}
- );
-}
-
-test "y_object_empty" {
- try roundTrip(
- \\{}
- );
-}
-
-test "y_object_empty_key" {
- try roundTrip(
- \\{"":0}
- );
-}
-
-test "y_object_escaped_null_in_key" {
- try ok(
- \\{"foo\u0000bar": 42}
- );
-}
-
-test "y_object_extreme_numbers" {
- try ok(
- \\{ "min": -1.0e+28, "max": 1.0e+28 }
- );
-}
-
-test "y_object" {
- try ok(
- \\{"asd":"sdf", "dfg":"fgh"}
- );
-}
-
-test "y_object_long_strings" {
- try ok(
- \\{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
- );
-}
-
-test "y_object_simple" {
- try roundTrip(
- \\{"a":[]}
- );
-}
-
-test "y_object_string_unicode" {
- try ok(
- \\{"title":"\u041f\u043e\u043b\u0442\u043e\u0440\u0430 \u0417\u0435\u043c\u043b\u0435\u043a\u043e\u043f\u0430" }
- );
-}
-
-test "y_object_with_newlines" {
- try ok(
- \\{
- \\"a": "b"
- \\}
- );
-}
-
-test "y_string_1_2_3_bytes_UTF-8_sequences" {
- try ok(
- \\["\u0060\u012a\u12AB"]
- );
-}
-
-test "y_string_accepted_surrogate_pair" {
- try ok(
- \\["\uD801\udc37"]
- );
-}
-
-test "y_string_accepted_surrogate_pairs" {
- try ok(
- \\["\ud83d\ude39\ud83d\udc8d"]
- );
-}
-
-test "y_string_allowed_escapes" {
- try ok(
- \\["\"\\\/\b\f\n\r\t"]
- );
-}
-
-test "y_string_backslash_and_u_escaped_zero" {
- try ok(
- \\["\\u0000"]
- );
-}
-
-test "y_string_backslash_doublequotes" {
- try roundTrip(
- \\["\""]
- );
-}
-
-test "y_string_comments" {
- try ok(
- \\["a/*b*/c/*d//e"]
- );
-}
-
-test "y_string_double_escape_a" {
- try ok(
- \\["\\a"]
- );
-}
-
-test "y_string_double_escape_n" {
- try roundTrip(
- \\["\\n"]
- );
-}
-
-test "y_string_escaped_control_character" {
- try ok(
- \\["\u0012"]
- );
-}
-
-test "y_string_escaped_noncharacter" {
- try ok(
- \\["\uFFFF"]
- );
-}
-
-test "y_string_in_array" {
- try ok(
- \\["asd"]
- );
-}
-
-test "y_string_in_array_with_leading_space" {
- try ok(
- \\[ "asd"]
- );
-}
-
-test "y_string_last_surrogates_1_and_2" {
- try ok(
- \\["\uDBFF\uDFFF"]
- );
-}
-
-test "y_string_nbsp_uescaped" {
- try ok(
- \\["new\u00A0line"]
- );
-}
-
-test "y_string_nonCharacterInUTF-8_U+10FFFF" {
- try ok(
- \\[""]
- );
-}
-
-test "y_string_nonCharacterInUTF-8_U+FFFF" {
- try ok(
- \\[""]
- );
-}
-
-test "y_string_null_escape" {
- try ok(
- \\["\u0000"]
- );
-}
-
-test "y_string_one-byte-utf-8" {
- try ok(
- \\["\u002c"]
- );
-}
-
-test "y_string_pi" {
- try ok(
- \\["π"]
- );
-}
-
-test "y_string_reservedCharacterInUTF-8_U+1BFFF" {
- try ok(
- \\[""]
- );
-}
-
-test "y_string_simple_ascii" {
- try ok(
- \\["asd "]
- );
-}
+fn roundTrip(s: []const u8) !void {
+ try testing.expect(try validate(testing.allocator, s));
-test "y_string_space" {
- try roundTrip(
- \\" "
- );
-}
+ var p = Parser.init(testing.allocator, .alloc_if_needed);
+ defer p.deinit();
-test "y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF" {
- try ok(
- \\["\uD834\uDd1e"]
- );
-}
+ var tree = try p.parse(s);
+ defer tree.deinit();
-test "y_string_three-byte-utf-8" {
- try ok(
- \\["\u0821"]
- );
-}
+ var buf: [256]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buf);
+ try tree.root.jsonStringify(.{}, fbs.writer());
-test "y_string_two-byte-utf-8" {
- try ok(
- \\["\u0123"]
- );
+ try testing.expectEqualStrings(s, fbs.getWritten());
}
-test "y_string_u+2028_line_sep" {
- try ok("[\"\xe2\x80\xa8\"]");
+test "truncated UTF-8 sequence" {
+ try err("\"\xc2\"");
+ try err("\"\xdf\"");
+ try err("\"\xed\xa0\"");
+ try err("\"\xf0\x80\"");
+ try err("\"\xf0\x80\x80\"");
}
-test "y_string_u+2029_par_sep" {
- try ok("[\"\xe2\x80\xa9\"]");
+test "invalid continuation byte" {
+ try err("\"\xc2\x00\"");
+ try err("\"\xc2\x7f\"");
+ try err("\"\xc2\xc0\"");
+ try err("\"\xc3\xc1\"");
+ try err("\"\xc4\xf5\"");
+ try err("\"\xc5\xff\"");
+ try err("\"\xe4\x80\x00\"");
+ try err("\"\xe5\x80\x10\"");
+ try err("\"\xe6\x80\xc0\"");
+ try err("\"\xe7\x80\xf5\"");
+ try err("\"\xe8\x00\x80\"");
+ try err("\"\xf2\x00\x80\x80\"");
+ try err("\"\xf0\x80\x00\x80\"");
+ try err("\"\xf1\x80\xc0\x80\"");
+ try err("\"\xf2\x80\x80\x00\"");
+ try err("\"\xf3\x80\x80\xc0\"");
+ try err("\"\xf4\x80\x80\xf5\"");
}
-test "y_string_uescaped_newline" {
- try ok(
- \\["new\u000Aline"]
- );
-}
-
-test "y_string_uEscape" {
- try ok(
- \\["\u0061\u30af\u30EA\u30b9"]
- );
-}
-
-test "y_string_unescaped_char_delete" {
- try ok("[\"\x7f\"]");
-}
-
-test "y_string_unicode_2" {
- try ok(
- \\["⍂㈴⍂"]
- );
-}
-
-test "y_string_unicodeEscapedBackslash" {
- try ok(
- \\["\u005C"]
- );
-}
-
-test "y_string_unicode_escaped_double_quote" {
- try ok(
- \\["\u0022"]
- );
-}
-
-test "y_string_unicode" {
- try ok(
- \\["\uA66D"]
- );
-}
-
-test "y_string_unicode_U+10FFFE_nonchar" {
- try ok(
- \\["\uDBFF\uDFFE"]
- );
-}
-
-test "y_string_unicode_U+1FFFE_nonchar" {
- try ok(
- \\["\uD83F\uDFFE"]
- );
-}
-
-test "y_string_unicode_U+200B_ZERO_WIDTH_SPACE" {
- try ok(
- \\["\u200B"]
- );
-}
-
-test "y_string_unicode_U+2064_invisible_plus" {
- try ok(
- \\["\u2064"]
- );
-}
-
-test "y_string_unicode_U+FDD0_nonchar" {
- try ok(
- \\["\uFDD0"]
- );
-}
-
-test "y_string_unicode_U+FFFE_nonchar" {
- try ok(
- \\["\uFFFE"]
- );
-}
-
-test "y_string_utf8" {
- try ok(
- \\["€𝄞"]
- );
-}
-
-test "y_string_with_del_character" {
- try ok("[\"a\x7fa\"]");
-}
-
-test "y_structure_lonely_false" {
- try roundTrip(
- \\false
- );
-}
-
-test "y_structure_lonely_int" {
- try roundTrip(
- \\42
- );
-}
-
-test "y_structure_lonely_negative_real" {
- try ok(
- \\-0.1
- );
-}
-
-test "y_structure_lonely_null" {
- try roundTrip(
- \\null
- );
-}
-
-test "y_structure_lonely_string" {
- try roundTrip(
- \\"asd"
- );
-}
-
-test "y_structure_lonely_true" {
- try roundTrip(
- \\true
- );
-}
-
-test "y_structure_string_empty" {
- try roundTrip(
- \\""
- );
-}
-
-test "y_structure_trailing_newline" {
- try roundTrip(
- \\["a"]
- );
-}
-
-test "y_structure_true_in_array" {
- try roundTrip(
- \\[true]
- );
-}
-
-test "y_structure_whitespace_array" {
- try ok(" [] ");
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-test "n_array_1_true_without_comma" {
- try err(
- \\[1 true]
- );
-}
-
-test "n_array_a_invalid_utf8" {
- try err(
- \\[aå]
- );
-}
-
-test "n_array_colon_instead_of_comma" {
- try err(
- \\["": 1]
- );
-}
-
-test "n_array_comma_after_close" {
- try err(
- \\[""],
- );
-}
-
-test "n_array_comma_and_number" {
- try err(
- \\[,1]
- );
-}
-
-test "n_array_double_comma" {
- try err(
- \\[1,,2]
- );
-}
-
-test "n_array_double_extra_comma" {
- try err(
- \\["x",,]
- );
-}
-
-test "n_array_extra_close" {
- try err(
- \\["x"]]
- );
-}
-
-test "n_array_extra_comma" {
- try err(
- \\["",]
- );
-}
-
-test "n_array_incomplete_invalid_value" {
- try err(
- \\[x
- );
-}
-
-test "n_array_incomplete" {
- try err(
- \\["x"
- );
-}
-
-test "n_array_inner_array_no_comma" {
- try err(
- \\[3[4]]
- );
-}
-
-test "n_array_invalid_utf8" {
- try err(
- \\[ÿ]
- );
-}
-
-test "n_array_items_separated_by_semicolon" {
- try err(
- \\[1:2]
- );
-}
-
-test "n_array_just_comma" {
- try err(
- \\[,]
- );
-}
-
-test "n_array_just_minus" {
- try err(
- \\[-]
- );
-}
-
-test "n_array_missing_value" {
- try err(
- \\[ , ""]
- );
-}
-
-test "n_array_newlines_unclosed" {
- try err(
- \\["a",
- \\4
- \\,1,
- );
-}
-
-test "n_array_number_and_comma" {
- try err(
- \\[1,]
- );
-}
-
-test "n_array_number_and_several_commas" {
- try err(
- \\[1,,]
- );
-}
-
-test "n_array_spaces_vertical_tab_formfeed" {
- try err("[\"\x0aa\"\\f]");
-}
-
-test "n_array_star_inside" {
- try err(
- \\[*]
- );
-}
-
-test "n_array_unclosed" {
- try err(
- \\[""
- );
-}
-
-test "n_array_unclosed_trailing_comma" {
- try err(
- \\[1,
- );
-}
-
-test "n_array_unclosed_with_new_lines" {
- try err(
- \\[1,
- \\1
- \\,1
- );
-}
-
-test "n_array_unclosed_with_object_inside" {
- try err(
- \\[{}
- );
-}
-
-test "n_incomplete_false" {
- try err(
- \\[fals]
- );
-}
-
-test "n_incomplete_null" {
- try err(
- \\[nul]
- );
-}
-
-test "n_incomplete_true" {
- try err(
- \\[tru]
- );
-}
-
-test "n_multidigit_number_then_00" {
- try err("123\x00");
-}
-
-test "n_number_0.1.2" {
- try err(
- \\[0.1.2]
- );
-}
-
-test "n_number_-01" {
- try err(
- \\[-01]
- );
-}
-
-test "n_number_0.3e" {
- try err(
- \\[0.3e]
- );
-}
-
-test "n_number_0.3e+" {
- try err(
- \\[0.3e+]
- );
-}
-
-test "n_number_0_capital_E" {
- try err(
- \\[0E]
- );
-}
-
-test "n_number_0_capital_E+" {
- try err(
- \\[0E+]
- );
-}
-
-test "n_number_0.e1" {
- try err(
- \\[0.e1]
- );
-}
-
-test "n_number_0e" {
- try err(
- \\[0e]
- );
-}
-
-test "n_number_0e+" {
- try err(
- \\[0e+]
- );
-}
-
-test "n_number_1_000" {
- try err(
- \\[1 000.0]
- );
-}
-
-test "n_number_1.0e-" {
- try err(
- \\[1.0e-]
- );
-}
-
-test "n_number_1.0e" {
- try err(
- \\[1.0e]
- );
-}
-
-test "n_number_1.0e+" {
- try err(
- \\[1.0e+]
- );
-}
-
-test "n_number_-1.0." {
- try err(
- \\[-1.0.]
- );
-}
-
-test "n_number_1eE2" {
- try err(
- \\[1eE2]
- );
-}
-
-test "n_number_.-1" {
- try err(
- \\[.-1]
- );
-}
-
-test "n_number_+1" {
- try err(
- \\[+1]
- );
-}
-
-test "n_number_.2e-3" {
- try err(
- \\[.2e-3]
- );
-}
-
-test "n_number_2.e-3" {
- try err(
- \\[2.e-3]
- );
-}
-
-test "n_number_2.e+3" {
- try err(
- \\[2.e+3]
- );
-}
-
-test "n_number_2.e3" {
- try err(
- \\[2.e3]
- );
-}
-
-test "n_number_-2." {
- try err(
- \\[-2.]
- );
-}
-
-test "n_number_9.e+" {
- try err(
- \\[9.e+]
- );
-}
-
-test "n_number_expression" {
- try err(
- \\[1+2]
- );
-}
-
-test "n_number_hex_1_digit" {
- try err(
- \\[0x1]
- );
-}
-
-test "n_number_hex_2_digits" {
- try err(
- \\[0x42]
- );
-}
-
-test "n_number_infinity" {
- try err(
- \\[Infinity]
- );
-}
-
-test "n_number_+Inf" {
- try err(
- \\[+Inf]
- );
-}
-
-test "n_number_Inf" {
- try err(
- \\[Inf]
- );
-}
-
-test "n_number_invalid+-" {
- try err(
- \\[0e+-1]
- );
-}
-
-test "n_number_invalid-negative-real" {
- try err(
- \\[-123.123foo]
- );
-}
-
-test "n_number_invalid-utf-8-in-bigger-int" {
- try err(
- \\[123å]
- );
-}
-
-test "n_number_invalid-utf-8-in-exponent" {
- try err(
- \\[1e1å]
- );
-}
-
-test "n_number_invalid-utf-8-in-int" {
- try err(
- \\[0å]
- );
-}
-
-test "n_number_++" {
- try err(
- \\[++1234]
- );
-}
-
-test "n_number_minus_infinity" {
- try err(
- \\[-Infinity]
- );
-}
-
-test "n_number_minus_sign_with_trailing_garbage" {
- try err(
- \\[-foo]
- );
-}
-
-test "n_number_minus_space_1" {
- try err(
- \\[- 1]
- );
-}
-
-test "n_number_-NaN" {
- try err(
- \\[-NaN]
- );
-}
-
-test "n_number_NaN" {
- try err(
- \\[NaN]
- );
-}
-
-test "n_number_neg_int_starting_with_zero" {
- try err(
- \\[-012]
- );
-}
-
-test "n_number_neg_real_without_int_part" {
- try err(
- \\[-.123]
- );
-}
-
-test "n_number_neg_with_garbage_at_end" {
- try err(
- \\[-1x]
- );
-}
-
-test "n_number_real_garbage_after_e" {
- try err(
- \\[1ea]
- );
-}
-
-test "n_number_real_with_invalid_utf8_after_e" {
- try err(
- \\[1eå]
- );
-}
-
-test "n_number_real_without_fractional_part" {
- try err(
- \\[1.]
- );
-}
-
-test "n_number_starting_with_dot" {
- try err(
- \\[.123]
- );
-}
-
-test "n_number_U+FF11_fullwidth_digit_one" {
- try err(
- \\[ï¼]
- );
-}
-
-test "n_number_with_alpha_char" {
- try err(
- \\[1.8011670033376514H-308]
- );
-}
-
-test "n_number_with_alpha" {
- try err(
- \\[1.2a-3]
- );
-}
-
-test "n_number_with_leading_zero" {
- try err(
- \\[012]
- );
-}
-
-test "n_object_bad_value" {
- try err(
- \\["x", truth]
- );
-}
-
-test "n_object_bracket_key" {
- try err(
- \\{[: "x"}
- );
-}
-
-test "n_object_comma_instead_of_colon" {
- try err(
- \\{"x", null}
- );
-}
-
-test "n_object_double_colon" {
- try err(
- \\{"x"::"b"}
- );
-}
-
-test "n_object_emoji" {
- try err(
- \\{ð¨ð}
- );
-}
-
-test "n_object_garbage_at_end" {
- try err(
- \\{"a":"a" 123}
- );
-}
-
-test "n_object_key_with_single_quotes" {
- try err(
- \\{key: 'value'}
- );
-}
-
-test "n_object_lone_continuation_byte_in_key_and_trailing_comma" {
- try err(
- \\{"¹":"0",}
- );
-}
-
-test "n_object_missing_colon" {
- try err(
- \\{"a" b}
- );
-}
-
-test "n_object_missing_key" {
- try err(
- \\{:"b"}
- );
-}
-
-test "n_object_missing_semicolon" {
- try err(
- \\{"a" "b"}
- );
-}
-
-test "n_object_missing_value" {
- try err(
- \\{"a":
- );
-}
-
-test "n_object_no-colon" {
- try err(
- \\{"a"
- );
-}
-
-test "n_object_non_string_key_but_huge_number_instead" {
- try err(
- \\{9999E9999:1}
- );
-}
-
-test "n_object_non_string_key" {
- try err(
- \\{1:1}
- );
-}
-
-test "n_object_repeated_null_null" {
- try err(
- \\{null:null,null:null}
- );
-}
-
-test "n_object_several_trailing_commas" {
- try err(
- \\{"id":0,,,,,}
- );
-}
-
-test "n_object_single_quote" {
- try err(
- \\{'a':0}
- );
-}
-
-test "n_object_trailing_comma" {
- try err(
- \\{"id":0,}
- );
-}
-
-test "n_object_trailing_comment" {
- try err(
- \\{"a":"b"}/**/
- );
-}
-
-test "n_object_trailing_comment_open" {
- try err(
- \\{"a":"b"}/**//
- );
-}
-
-test "n_object_trailing_comment_slash_open_incomplete" {
- try err(
- \\{"a":"b"}/
- );
-}
-
-test "n_object_trailing_comment_slash_open" {
- try err(
- \\{"a":"b"}//
- );
-}
-
-test "n_object_two_commas_in_a_row" {
- try err(
- \\{"a":"b",,"c":"d"}
- );
-}
-
-test "n_object_unquoted_key" {
- try err(
- \\{a: "b"}
- );
-}
-
-test "n_object_unterminated-value" {
- try err(
- \\{"a":"a
- );
-}
-
-test "n_object_with_single_string" {
- try err(
- \\{ "foo" : "bar", "a" }
- );
-}
-
-test "n_object_with_trailing_garbage" {
- try err(
- \\{"a":"b"}#
- );
-}
-
-test "n_single_space" {
- try err(" ");
-}
-
-test "n_string_1_surrogate_then_escape" {
- try err(
- \\["\uD800\"]
- );
-}
-
-test "n_string_1_surrogate_then_escape_u1" {
- try err(
- \\["\uD800\u1"]
- );
-}
-
-test "n_string_1_surrogate_then_escape_u1x" {
- try err(
- \\["\uD800\u1x"]
- );
-}
-
-test "n_string_1_surrogate_then_escape_u" {
- try err(
- \\["\uD800\u"]
- );
-}
-
-test "n_string_accentuated_char_no_quotes" {
- try err(
- \\[é]
- );
-}
-
-test "n_string_backslash_00" {
- try err("[\"\x00\"]");
-}
-
-test "n_string_escaped_backslash_bad" {
- try err(
- \\["\\\"]
- );
-}
-
-test "n_string_escaped_ctrl_char_tab" {
- try err("\x5b\x22\x5c\x09\x22\x5d");
-}
-
-test "n_string_escaped_emoji" {
- try err("[\"\x5c\xc3\xb0\xc2\x9f\xc2\x8c\xc2\x80\"]");
-}
-
-test "n_string_escape_x" {
- try err(
- \\["\x00"]
- );
-}
-
-test "n_string_incomplete_escaped_character" {
- try err(
- \\["\u00A"]
- );
-}
-
-test "n_string_incomplete_escape" {
- try err(
- \\["\"]
- );
-}
-
-test "n_string_incomplete_surrogate_escape_invalid" {
- try err(
- \\["\uD800\uD800\x"]
- );
-}
-
-test "n_string_incomplete_surrogate" {
- try err(
- \\["\uD834\uDd"]
- );
-}
-
-test "n_string_invalid_backslash_esc" {
- try err(
- \\["\a"]
- );
-}
-
-test "n_string_invalid_unicode_escape" {
- try err(
- \\["\uqqqq"]
- );
-}
-
-test "n_string_invalid_utf8_after_escape" {
- try err("[\"\\\x75\xc3\xa5\"]");
-}
-
-test "n_string_invalid-utf-8-in-escape" {
- try err(
- \\["\uå"]
- );
-}
-
-test "n_string_leading_uescaped_thinspace" {
- try err(
- \\[\u0020"asd"]
- );
-}
-
-test "n_string_no_quotes_with_bad_escape" {
- try err(
- \\[\n]
- );
-}
-
-test "n_string_single_doublequote" {
- try err(
- \\"
- );
-}
-
-test "n_string_single_quote" {
- try err(
- \\['single quote']
- );
-}
-
-test "n_string_single_string_no_double_quotes" {
- try err(
- \\abc
- );
-}
-
-test "n_string_start_escape_unclosed" {
- try err(
- \\["\
- );
-}
-
-test "n_string_unescaped_crtl_char" {
- try err("[\"a\x00a\"]");
-}
-
-test "n_string_unescaped_newline" {
- try err(
- \\["new
- \\line"]
- );
-}
-
-test "n_string_unescaped_tab" {
- try err("[\"\t\"]");
-}
-
-test "n_string_unicode_CapitalU" {
- try err(
- \\"\UA66D"
- );
-}
-
-test "n_string_with_trailing_garbage" {
- try err(
- \\""x
- );
-}
-
-test "n_structure_100000_opening_arrays" {
- try err("[" ** 100000);
-}
-
-test "n_structure_angle_bracket_." {
- try err(
- \\<.>
- );
-}
-
-test "n_structure_angle_bracket_null" {
- try err(
- \\[<null>]
- );
-}
-
-test "n_structure_array_trailing_garbage" {
- try err(
- \\[1]x
- );
-}
-
-test "n_structure_array_with_extra_array_close" {
- try err(
- \\[1]]
- );
-}
-
-test "n_structure_array_with_unclosed_string" {
- try err(
- \\["asd]
- );
-}
-
-test "n_structure_ascii-unicode-identifier" {
- try err(
- \\aå
- );
-}
-
-test "n_structure_capitalized_True" {
- try err(
- \\[True]
- );
-}
-
-test "n_structure_close_unopened_array" {
- try err(
- \\1]
- );
-}
-
-test "n_structure_comma_instead_of_closing_brace" {
- try err(
- \\{"x": true,
- );
-}
-
-test "n_structure_double_array" {
- try err(
- \\[][]
- );
-}
-
-test "n_structure_end_array" {
- try err(
- \\]
- );
-}
-
-test "n_structure_incomplete_UTF8_BOM" {
- try err(
- \\ï»{}
- );
-}
-
-test "n_structure_lone-invalid-utf-8" {
- try err(
- \\å
- );
-}
-
-test "n_structure_lone-open-bracket" {
- try err(
- \\[
- );
-}
-
-test "n_structure_no_data" {
- try err(
- \\
- );
-}
-
-test "n_structure_null-byte-outside-string" {
- try err("[\x00]");
-}
-
-test "n_structure_number_with_trailing_garbage" {
- try err(
- \\2@
- );
-}
-
-test "n_structure_object_followed_by_closing_object" {
- try err(
- \\{}}
- );
-}
-
-test "n_structure_object_unclosed_no_value" {
- try err(
- \\{"":
- );
-}
-
-test "n_structure_object_with_comment" {
- try err(
- \\{"a":/*comment*/"b"}
- );
-}
-
-test "n_structure_object_with_trailing_garbage" {
- try err(
- \\{"a": true} "x"
- );
-}
-
-test "n_structure_open_array_apostrophe" {
- try err(
- \\['
- );
-}
-
-test "n_structure_open_array_comma" {
- try err(
- \\[,
- );
-}
-
-test "n_structure_open_array_object" {
- try err("[{\"\":" ** 50000);
-}
-
-test "n_structure_open_array_open_object" {
- try err(
- \\[{
- );
-}
-
-test "n_structure_open_array_open_string" {
- try err(
- \\["a
- );
-}
-
-test "n_structure_open_array_string" {
- try err(
- \\["a"
- );
-}
-
-test "n_structure_open_object_close_array" {
- try err(
- \\{]
- );
-}
-
-test "n_structure_open_object_comma" {
- try err(
- \\{,
- );
-}
-
-test "n_structure_open_object" {
- try err(
- \\{
- );
-}
-
-test "n_structure_open_object_open_array" {
- try err(
- \\{[
- );
-}
-
-test "n_structure_open_object_open_string" {
- try err(
- \\{"a
- );
-}
-
-test "n_structure_open_object_string_with_apostrophes" {
- try err(
- \\{'a'
- );
-}
-
-test "n_structure_open_open" {
- try err(
- \\["\{["\{["\{["\{
- );
-}
-
-test "n_structure_single_eacute" {
- try err(
- \\é
- );
-}
-
-test "n_structure_single_star" {
- try err(
- \\*
- );
-}
-
-test "n_structure_trailing_#" {
- try err(
- \\{"a":"b"}#{}
- );
-}
-
-test "n_structure_U+2060_word_joined" {
- try err(
- \\[â ]
- );
-}
-
-test "n_structure_uescaped_LF_before_string" {
- try err(
- \\[\u000A""]
- );
-}
-
-test "n_structure_unclosed_array" {
- try err(
- \\[1
- );
-}
-
-test "n_structure_unclosed_array_partial_null" {
- try err(
- \\[ false, nul
- );
-}
-
-test "n_structure_unclosed_array_unfinished_false" {
- try err(
- \\[ true, fals
- );
-}
-
-test "n_structure_unclosed_array_unfinished_true" {
- try err(
- \\[ false, tru
- );
-}
-
-test "n_structure_unclosed_object" {
- try err(
- \\{"asd":"asd"
- );
-}
-
-test "n_structure_unicode-identifier" {
- try err(
- \\Ã¥
- );
-}
-
-test "n_structure_UTF8_BOM_no_data" {
- try err(
- \\
- );
-}
-
-test "n_structure_whitespace_formfeed" {
- try err("[\x0c]");
-}
-
-test "n_structure_whitespace_U+2060_word_joiner" {
- try err(
- \\[â ]
- );
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-test "i_number_double_huge_neg_exp" {
- try any(
- \\[123.456e-789]
- );
-}
-
-test "i_number_huge_exp" {
- try any(
- \\[0.4e00669999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999969999999006]
- );
-}
-
-test "i_number_neg_int_huge_exp" {
- try any(
- \\[-1e+9999]
- );
-}
-
-test "i_number_pos_double_huge_exp" {
- try any(
- \\[1.5e+9999]
- );
-}
-
-test "i_number_real_neg_overflow" {
- try any(
- \\[-123123e100000]
- );
-}
-
-test "i_number_real_pos_overflow" {
- try any(
- \\[123123e100000]
- );
-}
-
-test "i_number_real_underflow" {
- try any(
- \\[123e-10000000]
- );
-}
-
-test "i_number_too_big_neg_int" {
- try any(
- \\[-123123123123123123123123123123]
- );
-}
-
-test "i_number_too_big_pos_int" {
- try any(
- \\[100000000000000000000]
- );
-}
-
-test "i_number_very_big_negative_int" {
- try any(
- \\[-237462374673276894279832749832423479823246327846]
- );
-}
-
-test "i_object_key_lone_2nd_surrogate" {
- try anyStreamingErrNonStreaming(
- \\{"\uDFAA":0}
- );
-}
-
-test "i_string_1st_surrogate_but_2nd_missing" {
- try anyStreamingErrNonStreaming(
- \\["\uDADA"]
- );
-}
-
-test "i_string_1st_valid_surrogate_2nd_invalid" {
- try anyStreamingErrNonStreaming(
- \\["\uD888\u1234"]
- );
-}
-
-test "i_string_incomplete_surrogate_and_escape_valid" {
- try anyStreamingErrNonStreaming(
- \\["\uD800\n"]
- );
-}
-
-test "i_string_incomplete_surrogate_pair" {
- try anyStreamingErrNonStreaming(
- \\["\uDd1ea"]
- );
-}
-
-test "i_string_incomplete_surrogates_escape_valid" {
- try anyStreamingErrNonStreaming(
- \\["\uD800\uD800\n"]
- );
-}
-
-test "i_string_invalid_lonely_surrogate" {
- try anyStreamingErrNonStreaming(
- \\["\ud800"]
- );
-}
-
-test "i_string_invalid_surrogate" {
- try anyStreamingErrNonStreaming(
- \\["\ud800abc"]
- );
-}
-
-test "i_string_invalid_utf-8" {
- try any(
- \\["ÿ"]
- );
-}
-
-test "i_string_inverted_surrogates_U+1D11E" {
- try anyStreamingErrNonStreaming(
- \\["\uDd1e\uD834"]
- );
-}
-
-test "i_string_iso_latin_1" {
- try any(
- \\["é"]
- );
-}
-
-test "i_string_lone_second_surrogate" {
- try anyStreamingErrNonStreaming(
- \\["\uDFAA"]
- );
-}
-
-test "i_string_lone_utf8_continuation_byte" {
- try any(
- \\[""]
- );
-}
-
-test "i_string_not_in_unicode_range" {
- try any(
- \\["ô¿¿¿"]
- );
-}
-
-test "i_string_overlong_sequence_2_bytes" {
- try any(
- \\["À¯"]
- );
-}
-
-test "i_string_overlong_sequence_6_bytes" {
- try any(
- \\["ü¿¿¿¿"]
- );
-}
-
-test "i_string_overlong_sequence_6_bytes_null" {
- try any(
- \\["ü"]
- );
-}
-
-test "i_string_truncated-utf-8" {
- try any(
- \\["àÿ"]
- );
-}
-
-test "i_string_utf16BE_no_BOM" {
- try any("\x00\x5b\x00\x22\x00\xc3\xa9\x00\x22\x00\x5d");
-}
-
-test "i_string_utf16LE_no_BOM" {
- try any("\x5b\x00\x22\x00\xc3\xa9\x00\x22\x00\x5d\x00");
-}
-
-test "i_string_UTF-16LE_with_BOM" {
- try any("\xc3\xbf\xc3\xbe\x5b\x00\x22\x00\xc3\xa9\x00\x22\x00\x5d\x00");
-}
-
-test "i_string_UTF-8_invalid_sequence" {
- try any(
- \\["æ¥Ñú"]
- );
-}
-
-test "i_string_UTF8_surrogate_U+D800" {
- try any(
- \\["í "]
- );
-}
-
-test "i_structure_500_nested_arrays" {
- try any(("[" ** 500) ++ ("]" ** 500));
-}
-
-test "i_structure_UTF-8_BOM_empty_object" {
- try any(
- \\{}
- );
-}
-
-test "truncated UTF-8 sequence" {
- try utf8Error("\"\xc2\"");
- try utf8Error("\"\xdf\"");
- try utf8Error("\"\xed\xa0\"");
- try utf8Error("\"\xf0\x80\"");
- try utf8Error("\"\xf0\x80\x80\"");
-}
-
-test "invalid continuation byte" {
- try utf8Error("\"\xc2\x00\"");
- try utf8Error("\"\xc2\x7f\"");
- try utf8Error("\"\xc2\xc0\"");
- try utf8Error("\"\xc3\xc1\"");
- try utf8Error("\"\xc4\xf5\"");
- try utf8Error("\"\xc5\xff\"");
- try utf8Error("\"\xe4\x80\x00\"");
- try utf8Error("\"\xe5\x80\x10\"");
- try utf8Error("\"\xe6\x80\xc0\"");
- try utf8Error("\"\xe7\x80\xf5\"");
- try utf8Error("\"\xe8\x00\x80\"");
- try utf8Error("\"\xf2\x00\x80\x80\"");
- try utf8Error("\"\xf0\x80\x00\x80\"");
- try utf8Error("\"\xf1\x80\xc0\x80\"");
- try utf8Error("\"\xf2\x80\x80\x00\"");
- try utf8Error("\"\xf3\x80\x80\xc0\"");
- try utf8Error("\"\xf4\x80\x80\xf5\"");
-}
-
-test "disallowed overlong form" {
- try utf8Error("\"\xc0\x80\"");
- try utf8Error("\"\xc0\x90\"");
- try utf8Error("\"\xc1\x80\"");
- try utf8Error("\"\xc1\x90\"");
- try utf8Error("\"\xe0\x80\x80\"");
- try utf8Error("\"\xf0\x80\x80\x80\"");
+test "disallowed overlong form" {
+ try err("\"\xc0\x80\"");
+ try err("\"\xc0\x90\"");
+ try err("\"\xc1\x80\"");
+ try err("\"\xc1\x90\"");
+ try err("\"\xe0\x80\x80\"");
+ try err("\"\xf0\x80\x80\x80\"");
}
test "out of UTF-16 range" {
- try utf8Error("\"\xf4\x90\x80\x80\"");
- try utf8Error("\"\xf5\x80\x80\x80\"");
- try utf8Error("\"\xf6\x80\x80\x80\"");
- try utf8Error("\"\xf7\x80\x80\x80\"");
- try utf8Error("\"\xf8\x80\x80\x80\"");
- try utf8Error("\"\xf9\x80\x80\x80\"");
- try utf8Error("\"\xfa\x80\x80\x80\"");
- try utf8Error("\"\xfb\x80\x80\x80\"");
- try utf8Error("\"\xfc\x80\x80\x80\"");
- try utf8Error("\"\xfd\x80\x80\x80\"");
- try utf8Error("\"\xfe\x80\x80\x80\"");
- try utf8Error("\"\xff\x80\x80\x80\"");
-}
-
-test "parse" {
- var ts = TokenStream.init("false");
- try testing.expectEqual(false, try parse(bool, &ts, ParseOptions{}));
- ts = TokenStream.init("true");
- try testing.expectEqual(true, try parse(bool, &ts, ParseOptions{}));
- ts = TokenStream.init("1");
- try testing.expectEqual(@as(u1, 1), try parse(u1, &ts, ParseOptions{}));
- ts = TokenStream.init("50");
- try testing.expectError(error.Overflow, parse(u1, &ts, ParseOptions{}));
- ts = TokenStream.init("42");
- try testing.expectEqual(@as(u64, 42), try parse(u64, &ts, ParseOptions{}));
- ts = TokenStream.init("42.0");
- try testing.expectEqual(@as(f64, 42), try parse(f64, &ts, ParseOptions{}));
- ts = TokenStream.init("null");
- try testing.expectEqual(@as(?bool, null), try parse(?bool, &ts, ParseOptions{}));
- ts = TokenStream.init("true");
- try testing.expectEqual(@as(?bool, true), try parse(?bool, &ts, ParseOptions{}));
-
- ts = TokenStream.init("\"foo\"");
- try testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &ts, ParseOptions{}));
- ts = TokenStream.init("[102, 111, 111]");
- try testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &ts, ParseOptions{}));
- ts = TokenStream.init("[]");
- try testing.expectEqual(@as([0]u8, undefined), try parse([0]u8, &ts, ParseOptions{}));
-
- ts = TokenStream.init("\"12345678901234567890\"");
- try testing.expectEqual(@as(u64, 12345678901234567890), try parse(u64, &ts, ParseOptions{}));
- ts = TokenStream.init("\"123.456\"");
- try testing.expectEqual(@as(f64, 123.456), try parse(f64, &ts, ParseOptions{}));
-}
-
-test "parse into enum" {
- const T = enum(u32) {
- Foo = 42,
- Bar,
- @"with\\escape",
- };
- var ts = TokenStream.init("\"Foo\"");
- try testing.expectEqual(@as(T, .Foo), try parse(T, &ts, ParseOptions{}));
- ts = TokenStream.init("42");
- try testing.expectEqual(@as(T, .Foo), try parse(T, &ts, ParseOptions{}));
- ts = TokenStream.init("\"with\\\\escape\"");
- try testing.expectEqual(@as(T, .@"with\\escape"), try parse(T, &ts, ParseOptions{}));
- ts = TokenStream.init("5");
- try testing.expectError(error.InvalidEnumTag, parse(T, &ts, ParseOptions{}));
- ts = TokenStream.init("\"Qux\"");
- try testing.expectError(error.InvalidEnumTag, parse(T, &ts, ParseOptions{}));
-}
-
-test "parse with trailing data" {
- var ts = TokenStream.init("falsed");
- try testing.expectEqual(false, try parse(bool, &ts, ParseOptions{ .allow_trailing_data = true }));
- ts = TokenStream.init("falsed");
- try testing.expectError(error.InvalidTopLevelTrailing, parse(bool, &ts, ParseOptions{ .allow_trailing_data = false }));
- // trailing whitespace is okay
- ts = TokenStream.init("false \n");
- try testing.expectEqual(false, try parse(bool, &ts, ParseOptions{ .allow_trailing_data = false }));
-}
-
-test "parse into that allocates a slice" {
- var ts = TokenStream.init("\"foo\"");
- try testing.expectError(error.AllocatorRequired, parse([]u8, &ts, ParseOptions{}));
-
- const options = ParseOptions{ .allocator = testing.allocator };
- {
- ts = TokenStream.init("\"foo\"");
- const r = try parse([]u8, &ts, options);
- defer parseFree([]u8, r, options);
- try testing.expectEqualSlices(u8, "foo", r);
- }
- {
- ts = TokenStream.init("[102, 111, 111]");
- const r = try parse([]u8, &ts, options);
- defer parseFree([]u8, r, options);
- try testing.expectEqualSlices(u8, "foo", r);
- }
- {
- ts = TokenStream.init("\"with\\\\escape\"");
- const r = try parse([]u8, &ts, options);
- defer parseFree([]u8, r, options);
- try testing.expectEqualSlices(u8, "with\\escape", r);
- }
-}
-
-test "parse into tagged union" {
- {
- const T = union(enum) {
- int: i32,
- float: f64,
- string: []const u8,
- };
- var ts = TokenStream.init("1.5");
- try testing.expectEqual(T{ .float = 1.5 }, try parse(T, &ts, ParseOptions{}));
- }
-
- { // failing allocations should be bubbled up instantly without trying next member
- var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0);
- const options = ParseOptions{ .allocator = fail_alloc.allocator() };
- const T = union(enum) {
- // both fields here match the input
- string: []const u8,
- array: [3]u8,
- };
- var ts = TokenStream.init("[1,2,3]");
- try testing.expectError(error.OutOfMemory, parse(T, &ts, options));
- }
-
- {
- // if multiple matches possible, takes first option
- const T = union(enum) {
- x: u8,
- y: u8,
- };
- var ts = TokenStream.init("42");
- try testing.expectEqual(T{ .x = 42 }, try parse(T, &ts, ParseOptions{}));
- }
-
- { // needs to back out when first union member doesn't match
- const T = union(enum) {
- A: struct { x: u32 },
- B: struct { y: u32 },
- };
- var ts = TokenStream.init("{\"y\":42}");
- try testing.expectEqual(T{ .B = .{ .y = 42 } }, try parse(T, &ts, ParseOptions{}));
- }
-}
-
-test "parse union bubbles up AllocatorRequired" {
- { // string member first in union (and not matching)
- const T = union(enum) {
- string: []const u8,
- int: i32,
- };
- var ts = TokenStream.init("42");
- try testing.expectError(error.AllocatorRequired, parse(T, &ts, ParseOptions{}));
- }
-
- { // string member not first in union (and matching)
- const T = union(enum) {
- int: i32,
- float: f64,
- string: []const u8,
- };
- var ts = TokenStream.init("\"foo\"");
- try testing.expectError(error.AllocatorRequired, parse(T, &ts, ParseOptions{}));
- }
-}
-
-test "parseFree descends into tagged union" {
- var fail_alloc = testing.FailingAllocator.init(testing.allocator, 1);
- const options = ParseOptions{ .allocator = fail_alloc.allocator() };
- const T = union(enum) {
- int: i32,
- float: f64,
- string: []const u8,
- };
- // use a string with unicode escape so we know result can't be a reference to global constant
- var ts = TokenStream.init("\"with\\u0105unicode\"");
- const r = try parse(T, &ts, options);
- try testing.expectEqual(std.meta.Tag(T).string, @as(std.meta.Tag(T), r));
- try testing.expectEqualSlices(u8, "withąunicode", r.string);
- try testing.expectEqual(@as(usize, 0), fail_alloc.deallocations);
- parseFree(T, r, options);
- try testing.expectEqual(@as(usize, 1), fail_alloc.deallocations);
-}
-
-test "parse with comptime field" {
- {
- const T = struct {
- comptime a: i32 = 0,
- b: bool,
- };
- var ts = TokenStream.init(
- \\{
- \\ "a": 0,
- \\ "b": true
- \\}
- );
- try testing.expectEqual(T{ .a = 0, .b = true }, try parse(T, &ts, ParseOptions{}));
- }
-
- { // string comptime values currently require an allocator
- const T = union(enum) {
- foo: struct {
- comptime kind: []const u8 = "boolean",
- b: bool,
- },
- bar: struct {
- comptime kind: []const u8 = "float",
- b: f64,
- },
- };
-
- const options = ParseOptions{
- .allocator = std.testing.allocator,
- };
-
- var ts = TokenStream.init(
- \\{
- \\ "kind": "float",
- \\ "b": 1.0
- \\}
- );
- const r = try parse(T, &ts, options);
-
- // check that parseFree doesn't try to free comptime fields
- parseFree(T, r, options);
- }
-}
-
-test "parse into struct with no fields" {
- const T = struct {};
- var ts = TokenStream.init("{}");
- try testing.expectEqual(T{}, try parse(T, &ts, ParseOptions{}));
-}
-
-const test_const_value: usize = 123;
-
-test "parse into struct with default const pointer field" {
- const T = struct { a: *const usize = &test_const_value };
- var ts = TokenStream.init("{}");
- try testing.expectEqual(T{}, try parse(T, &ts, .{}));
-}
-
-const test_default_usize: usize = 123;
-const test_default_usize_ptr: *align(1) const usize = &test_default_usize;
-const test_default_str: []const u8 = "test str";
-const test_default_str_slice: [2][]const u8 = [_][]const u8{
- "test1",
- "test2",
-};
-
-test "freeing parsed structs with pointers to default values" {
- const T = struct {
- int: *const usize = &test_default_usize,
- int_ptr: *allowzero align(1) const usize = test_default_usize_ptr,
- str: []const u8 = test_default_str,
- str_slice: []const []const u8 = &test_default_str_slice,
- };
-
- var ts = json.TokenStream.init("{}");
- const options = .{ .allocator = std.heap.page_allocator };
- const parsed = try json.parse(T, &ts, options);
-
- try testing.expectEqual(T{}, parsed);
-
- json.parseFree(T, parsed, options);
-}
-
-test "parse into struct where destination and source lengths mismatch" {
- const T = struct { a: [2]u8 };
- var ts = TokenStream.init("{\"a\": \"bbb\"}");
- try testing.expectError(error.LengthMismatch, parse(T, &ts, ParseOptions{}));
-}
-
-test "parse into struct with misc fields" {
- @setEvalBranchQuota(10000);
- const options = ParseOptions{ .allocator = testing.allocator };
- const T = struct {
- int: i64,
- float: f64,
- @"with\\escape": bool,
- @"withąunicode😂": bool,
- language: []const u8,
- optional: ?bool,
- default_field: i32 = 42,
- static_array: [3]f64,
- dynamic_array: []f64,
-
- complex: struct {
- nested: []const u8,
- },
-
- veryComplex: []struct {
- foo: []const u8,
- },
-
- a_union: Union,
- const Union = union(enum) {
- x: u8,
- float: f64,
- string: []const u8,
- };
- };
- var ts = TokenStream.init(
- \\{
- \\ "int": 420,
- \\ "float": 3.14,
- \\ "with\\escape": true,
- \\ "with\u0105unicode\ud83d\ude02": false,
- \\ "language": "zig",
- \\ "optional": null,
- \\ "static_array": [66.6, 420.420, 69.69],
- \\ "dynamic_array": [66.6, 420.420, 69.69],
- \\ "complex": {
- \\ "nested": "zig"
- \\ },
- \\ "veryComplex": [
- \\ {
- \\ "foo": "zig"
- \\ }, {
- \\ "foo": "rocks"
- \\ }
- \\ ],
- \\ "a_union": 100000
- \\}
- );
- const r = try parse(T, &ts, options);
- defer parseFree(T, r, options);
- try testing.expectEqual(@as(i64, 420), r.int);
- try testing.expectEqual(@as(f64, 3.14), r.float);
- try testing.expectEqual(true, r.@"with\\escape");
- try testing.expectEqual(false, r.@"withąunicode😂");
- try testing.expectEqualSlices(u8, "zig", r.language);
- try testing.expectEqual(@as(?bool, null), r.optional);
- try testing.expectEqual(@as(i32, 42), r.default_field);
- try testing.expectEqual(@as(f64, 66.6), r.static_array[0]);
- try testing.expectEqual(@as(f64, 420.420), r.static_array[1]);
- try testing.expectEqual(@as(f64, 69.69), r.static_array[2]);
- try testing.expectEqual(@as(usize, 3), r.dynamic_array.len);
- try testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]);
- try testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]);
- try testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]);
- try testing.expectEqualSlices(u8, r.complex.nested, "zig");
- try testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo);
- try testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo);
- try testing.expectEqual(T.Union{ .float = 100000 }, r.a_union);
-}
-
-test "parse into struct with strings and arrays with sentinels" {
- @setEvalBranchQuota(10000);
- const options = ParseOptions{ .allocator = testing.allocator };
- const T = struct {
- language: [:0]const u8,
- language_without_sentinel: []const u8,
- data: [:99]const i32,
- simple_data: []const i32,
- };
- var ts = TokenStream.init(
- \\{
- \\ "language": "zig",
- \\ "language_without_sentinel": "zig again!",
- \\ "data": [1, 2, 3],
- \\ "simple_data": [4, 5, 6]
- \\}
- );
- const r = try parse(T, &ts, options);
- defer parseFree(T, r, options);
-
- try testing.expectEqualSentinel(u8, 0, "zig", r.language);
-
- const data = [_:99]i32{ 1, 2, 3 };
- try testing.expectEqualSentinel(i32, 99, data[0..data.len], r.data);
-
- // Make sure that arrays who aren't supposed to have a sentinel still parse without one.
- try testing.expectEqual(@as(?i32, null), std.meta.sentinel(@TypeOf(r.simple_data)));
- try testing.expectEqual(@as(?u8, null), std.meta.sentinel(@TypeOf(r.language_without_sentinel)));
-}
-
-test "parse into struct with duplicate field" {
- // allow allocator to detect double frees by keeping bucket in use
- const ballast = try testing.allocator.alloc(u64, 1);
- defer testing.allocator.free(ballast);
-
- const options_first = ParseOptions{ .allocator = testing.allocator, .duplicate_field_behavior = .UseFirst };
-
- const options_last = ParseOptions{
- .allocator = testing.allocator,
- .duplicate_field_behavior = .UseLast,
- };
-
- const str = "{ \"a\": 1, \"a\": 0.25 }";
-
- const T1 = struct { a: *u64 };
- // both .UseFirst and .UseLast should fail because second "a" value isn't a u64
- var ts = TokenStream.init(str);
- try testing.expectError(error.InvalidNumber, parse(T1, &ts, options_first));
- ts = TokenStream.init(str);
- try testing.expectError(error.InvalidNumber, parse(T1, &ts, options_last));
-
- const T2 = struct { a: f64 };
- ts = TokenStream.init(str);
- try testing.expectEqual(T2{ .a = 1.0 }, try parse(T2, &ts, options_first));
- ts = TokenStream.init(str);
- try testing.expectEqual(T2{ .a = 0.25 }, try parse(T2, &ts, options_last));
-
- const T3 = struct { comptime a: f64 = 1.0 };
- // .UseFirst should succeed because second "a" value is unconditionally ignored (even though != 1.0)
- const t3 = T3{ .a = 1.0 };
- ts = TokenStream.init(str);
- try testing.expectEqual(t3, try parse(T3, &ts, options_first));
- // .UseLast should fail because second "a" value is 0.25 which is not equal to default value of 1.0
- ts = TokenStream.init(str);
- try testing.expectError(error.UnexpectedValue, parse(T3, &ts, options_last));
-}
-
-test "parse into struct ignoring unknown fields" {
- const T = struct {
- int: i64,
- language: []const u8,
- };
-
- const ops = ParseOptions{
- .allocator = testing.allocator,
- .ignore_unknown_fields = true,
- };
-
- var ts = TokenStream.init(
- \\{
- \\ "int": 420,
- \\ "float": 3.14,
- \\ "with\\escape": true,
- \\ "with\u0105unicode\ud83d\ude02": false,
- \\ "optional": null,
- \\ "static_array": [66.6, 420.420, 69.69],
- \\ "dynamic_array": [66.6, 420.420, 69.69],
- \\ "complex": {
- \\ "nested": "zig"
- \\ },
- \\ "veryComplex": [
- \\ {
- \\ "foo": "zig"
- \\ }, {
- \\ "foo": "rocks"
- \\ }
- \\ ],
- \\ "a_union": 100000,
- \\ "language": "zig"
- \\}
- );
- const r = try parse(T, &ts, ops);
- defer parseFree(T, r, ops);
-
- try testing.expectEqual(@as(i64, 420), r.int);
- try testing.expectEqualSlices(u8, "zig", r.language);
-}
-
-test "parse into tuple" {
- const options = ParseOptions{ .allocator = testing.allocator };
- const Union = union(enum) {
- char: u8,
- float: f64,
- string: []const u8,
- };
- const T = std.meta.Tuple(&.{
- i64,
- f64,
- bool,
- []const u8,
- ?bool,
- struct {
- foo: i32,
- bar: []const u8,
- },
- std.meta.Tuple(&.{ u8, []const u8, u8 }),
- Union,
- });
- var ts = TokenStream.init(
- \\[
- \\ 420,
- \\ 3.14,
- \\ true,
- \\ "zig",
- \\ null,
- \\ {
- \\ "foo": 1,
- \\ "bar": "zero"
- \\ },
- \\ [4, "två", 42],
- \\ 12.34
- \\]
- );
- const r = try parse(T, &ts, options);
- defer parseFree(T, r, options);
- try testing.expectEqual(@as(i64, 420), r[0]);
- try testing.expectEqual(@as(f64, 3.14), r[1]);
- try testing.expectEqual(true, r[2]);
- try testing.expectEqualSlices(u8, "zig", r[3]);
- try testing.expectEqual(@as(?bool, null), r[4]);
- try testing.expectEqual(@as(i32, 1), r[5].foo);
- try testing.expectEqualSlices(u8, "zero", r[5].bar);
- try testing.expectEqual(@as(u8, 4), r[6][0]);
- try testing.expectEqualSlices(u8, "två", r[6][1]);
- try testing.expectEqual(@as(u8, 42), r[6][2]);
- try testing.expectEqual(Union{ .float = 12.34 }, r[7]);
-}
-
-const ParseIntoRecursiveUnionDefinitionValue = union(enum) {
- integer: i64,
- array: []const ParseIntoRecursiveUnionDefinitionValue,
-};
-
-test "parse into recursive union definition" {
- const T = struct {
- values: ParseIntoRecursiveUnionDefinitionValue,
- };
- const ops = ParseOptions{ .allocator = testing.allocator };
-
- var ts = TokenStream.init("{\"values\":[58]}");
- const r = try parse(T, &ts, ops);
- defer parseFree(T, r, ops);
-
- try testing.expectEqual(@as(i64, 58), r.values.array[0].integer);
-}
-
-const ParseIntoDoubleRecursiveUnionValueFirst = union(enum) {
- integer: i64,
- array: []const ParseIntoDoubleRecursiveUnionValueSecond,
-};
-
-const ParseIntoDoubleRecursiveUnionValueSecond = union(enum) {
- boolean: bool,
- array: []const ParseIntoDoubleRecursiveUnionValueFirst,
-};
-
-test "parse into double recursive union definition" {
- const T = struct {
- values: ParseIntoDoubleRecursiveUnionValueFirst,
- };
- const ops = ParseOptions{ .allocator = testing.allocator };
-
- var ts = TokenStream.init("{\"values\":[[58]]}");
- const r = try parse(T, &ts, ops);
- defer parseFree(T, r, ops);
-
- try testing.expectEqual(@as(i64, 58), r.values.array[0].array[0].integer);
-}
-
-test "parse into vector" {
- const options = ParseOptions{ .allocator = testing.allocator };
- const T = struct {
- vec_i32: @Vector(4, i32),
- vec_f32: @Vector(2, f32),
- };
- var ts = TokenStream.init(
- \\{
- \\ "vec_f32": [1.5, 2.5],
- \\ "vec_i32": [4, 5, 6, 7]
- \\}
- );
- const r = try parse(T, &ts, options);
- defer parseFree(T, r, options);
- try testing.expectApproxEqAbs(@as(f32, 1.5), r.vec_f32[0], 0.0000001);
- try testing.expectApproxEqAbs(@as(f32, 2.5), r.vec_f32[1], 0.0000001);
- try testing.expectEqual(@Vector(4, i32){ 4, 5, 6, 7 }, r.vec_i32);
-}
-
-test "json.parser.dynamic" {
- var p = Parser.init(testing.allocator, false);
- defer p.deinit();
-
- const s =
- \\{
- \\ "Image": {
- \\ "Width": 800,
- \\ "Height": 600,
- \\ "Title": "View from 15th Floor",
- \\ "Thumbnail": {
- \\ "Url": "http://www.example.com/image/481989943",
- \\ "Height": 125,
- \\ "Width": 100
- \\ },
- \\ "Animated" : false,
- \\ "IDs": [116, 943, 234, 38793],
- \\ "ArrayOfObject": [{"n": "m"}],
- \\ "double": 1.3412,
- \\ "LargeInt": 18446744073709551615
- \\ }
- \\}
- ;
-
- var tree = try p.parse(s);
- defer tree.deinit();
-
- var root = tree.root;
-
- var image = root.Object.get("Image").?;
-
- const width = image.Object.get("Width").?;
- try testing.expect(width.Integer == 800);
-
- const height = image.Object.get("Height").?;
- try testing.expect(height.Integer == 600);
-
- const title = image.Object.get("Title").?;
- try testing.expect(mem.eql(u8, title.String, "View from 15th Floor"));
-
- const animated = image.Object.get("Animated").?;
- try testing.expect(animated.Bool == false);
-
- const array_of_object = image.Object.get("ArrayOfObject").?;
- try testing.expect(array_of_object.Array.items.len == 1);
-
- const obj0 = array_of_object.Array.items[0].Object.get("n").?;
- try testing.expect(mem.eql(u8, obj0.String, "m"));
-
- const double = image.Object.get("double").?;
- try testing.expect(double.Float == 1.3412);
-
- const large_int = image.Object.get("LargeInt").?;
- try testing.expect(mem.eql(u8, large_int.NumberString, "18446744073709551615"));
-}
-
-test "write json then parse it" {
- var out_buffer: [1000]u8 = undefined;
-
- var fixed_buffer_stream = std.io.fixedBufferStream(&out_buffer);
- const out_stream = fixed_buffer_stream.writer();
- var jw = writeStream(out_stream, 4);
-
- try jw.beginObject();
-
- try jw.objectField("f");
- try jw.emitBool(false);
-
- try jw.objectField("t");
- try jw.emitBool(true);
-
- try jw.objectField("int");
- try jw.emitNumber(1234);
-
- try jw.objectField("array");
- try jw.beginArray();
-
- try jw.arrayElem();
- try jw.emitNull();
-
- try jw.arrayElem();
- try jw.emitNumber(12.34);
-
- try jw.endArray();
-
- try jw.objectField("str");
- try jw.emitString("hello");
-
- try jw.endObject();
-
- var parser = Parser.init(testing.allocator, false);
- defer parser.deinit();
- var tree = try parser.parse(fixed_buffer_stream.getWritten());
- defer tree.deinit();
-
- try testing.expect(tree.root.Object.get("f").?.Bool == false);
- try testing.expect(tree.root.Object.get("t").?.Bool == true);
- try testing.expect(tree.root.Object.get("int").?.Integer == 1234);
- try testing.expect(tree.root.Object.get("array").?.Array.items[0].Null == {});
- try testing.expect(tree.root.Object.get("array").?.Array.items[1].Float == 12.34);
- try testing.expect(mem.eql(u8, tree.root.Object.get("str").?.String, "hello"));
-}
-
-fn testParse(arena_allocator: std.mem.Allocator, json_str: []const u8) !Value {
- var p = Parser.init(arena_allocator, false);
- return (try p.parse(json_str)).root;
-}
-
-test "parsing empty string gives appropriate error" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- try testing.expectError(error.UnexpectedEndOfJson, testParse(arena_allocator.allocator(), ""));
-}
-
-test "parse tree should not contain dangling pointers" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
-
- var p = json.Parser.init(arena_allocator.allocator(), false);
- defer p.deinit();
-
- var tree = try p.parse("[]");
- defer tree.deinit();
-
- // Allocation should succeed
- var i: usize = 0;
- while (i < 100) : (i += 1) {
- try tree.root.Array.append(std.json.Value{ .Integer = 100 });
- }
- try testing.expectEqual(tree.root.Array.items.len, 100);
-}
-
-test "integer after float has proper type" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- const parsed = try testParse(arena_allocator.allocator(),
- \\{
- \\ "float": 3.14,
- \\ "ints": [1, 2, 3]
- \\}
- );
- try std.testing.expect(parsed.Object.get("ints").?.Array.items[0] == .Integer);
-}
-
-test "parse exponential into int" {
- const T = struct { int: i64 };
- var ts = TokenStream.init("{ \"int\": 4.2e2 }");
- const r = try parse(T, &ts, ParseOptions{});
- try testing.expectEqual(@as(i64, 420), r.int);
- ts = TokenStream.init("{ \"int\": 0.042e2 }");
- try testing.expectError(error.InvalidNumber, parse(T, &ts, ParseOptions{}));
- ts = TokenStream.init("{ \"int\": 18446744073709551616.0 }");
- try testing.expectError(error.Overflow, parse(T, &ts, ParseOptions{}));
-}
-
-test "escaped characters" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- const input =
- \\{
- \\ "backslash": "\\",
- \\ "forwardslash": "\/",
- \\ "newline": "\n",
- \\ "carriagereturn": "\r",
- \\ "tab": "\t",
- \\ "formfeed": "\f",
- \\ "backspace": "\b",
- \\ "doublequote": "\"",
- \\ "unicode": "\u0105",
- \\ "surrogatepair": "\ud83d\ude02"
- \\}
- ;
-
- const obj = (try testParse(arena_allocator.allocator(), input)).Object;
-
- try testing.expectEqualSlices(u8, obj.get("backslash").?.String, "\\");
- try testing.expectEqualSlices(u8, obj.get("forwardslash").?.String, "/");
- try testing.expectEqualSlices(u8, obj.get("newline").?.String, "\n");
- try testing.expectEqualSlices(u8, obj.get("carriagereturn").?.String, "\r");
- try testing.expectEqualSlices(u8, obj.get("tab").?.String, "\t");
- try testing.expectEqualSlices(u8, obj.get("formfeed").?.String, "\x0C");
- try testing.expectEqualSlices(u8, obj.get("backspace").?.String, "\x08");
- try testing.expectEqualSlices(u8, obj.get("doublequote").?.String, "\"");
- try testing.expectEqualSlices(u8, obj.get("unicode").?.String, "ą");
- try testing.expectEqualSlices(u8, obj.get("surrogatepair").?.String, "😂");
-}
-
-test "string copy option" {
- const input =
- \\{
- \\ "noescape": "aą😂",
- \\ "simple": "\\\/\n\r\t\f\b\"",
- \\ "unicode": "\u0105",
- \\ "surrogatepair": "\ud83d\ude02"
- \\}
- ;
-
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- const allocator = arena_allocator.allocator();
-
- var parser = Parser.init(allocator, false);
- const tree_nocopy = try parser.parse(input);
- const obj_nocopy = tree_nocopy.root.Object;
-
- parser = Parser.init(allocator, true);
- const tree_copy = try parser.parse(input);
- const obj_copy = tree_copy.root.Object;
-
- for ([_][]const u8{ "noescape", "simple", "unicode", "surrogatepair" }) |field_name| {
- try testing.expectEqualSlices(u8, obj_nocopy.get(field_name).?.String, obj_copy.get(field_name).?.String);
- }
-
- const nocopy_addr = &obj_nocopy.get("noescape").?.String[0];
- const copy_addr = &obj_copy.get("noescape").?.String[0];
-
- var found_nocopy = false;
- for (input, 0..) |_, index| {
- try testing.expect(copy_addr != &input[index]);
- if (nocopy_addr == &input[index]) {
- found_nocopy = true;
- }
- }
- try testing.expect(found_nocopy);
-}
-
-test "stringify alloc" {
- const allocator = std.testing.allocator;
- const expected =
- \\{"foo":"bar","answer":42,"my_friend":"sammy"}
- ;
- const actual = try stringifyAlloc(allocator, .{ .foo = "bar", .answer = 42, .my_friend = "sammy" }, .{});
- defer allocator.free(actual);
-
- try std.testing.expectEqualStrings(expected, actual);
-}
-
-test "json.serialize issue #5959" {
- var parser: StreamingParser = undefined;
- // StreamingParser has multiple internal fields set to undefined. This causes issues when using
- // expectEqual so these are zeroed. We are testing for equality here only because this is a
- // known small test reproduction which hits the relevant LLVM issue.
- @memset(@ptrCast([*]u8, &parser)[0..@sizeOf(StreamingParser)], 0);
- try std.testing.expectEqual(parser, parser);
-}
-
-fn checkNext(p: *TokenStream, id: std.meta.Tag(Token)) !void {
- const token = (p.next() catch unreachable).?;
- try testing.expect(std.meta.activeTag(token) == id);
-}
-
-test "json.token" {
- const s =
- \\{
- \\ "Image": {
- \\ "Width": 800,
- \\ "Height": 600,
- \\ "Title": "View from 15th Floor",
- \\ "Thumbnail": {
- \\ "Url": "http://www.example.com/image/481989943",
- \\ "Height": 125,
- \\ "Width": 100
- \\ },
- \\ "Animated" : false,
- \\ "IDs": [116, 943, 234, 38793]
- \\ }
- \\}
- ;
-
- var p = TokenStream.init(s);
-
- try checkNext(&p, .ObjectBegin);
- try checkNext(&p, .String); // Image
- try checkNext(&p, .ObjectBegin);
- try checkNext(&p, .String); // Width
- try checkNext(&p, .Number);
- try checkNext(&p, .String); // Height
- try checkNext(&p, .Number);
- try checkNext(&p, .String); // Title
- try checkNext(&p, .String);
- try checkNext(&p, .String); // Thumbnail
- try checkNext(&p, .ObjectBegin);
- try checkNext(&p, .String); // Url
- try checkNext(&p, .String);
- try checkNext(&p, .String); // Height
- try checkNext(&p, .Number);
- try checkNext(&p, .String); // Width
- try checkNext(&p, .Number);
- try checkNext(&p, .ObjectEnd);
- try checkNext(&p, .String); // Animated
- try checkNext(&p, .False);
- try checkNext(&p, .String); // IDs
- try checkNext(&p, .ArrayBegin);
- try checkNext(&p, .Number);
- try checkNext(&p, .Number);
- try checkNext(&p, .Number);
- try checkNext(&p, .Number);
- try checkNext(&p, .ArrayEnd);
- try checkNext(&p, .ObjectEnd);
- try checkNext(&p, .ObjectEnd);
-
- try testing.expect((try p.next()) == null);
-}
-
-test "json.token mismatched close" {
- var p = TokenStream.init("[102, 111, 111 }");
- try checkNext(&p, .ArrayBegin);
- try checkNext(&p, .Number);
- try checkNext(&p, .Number);
- try checkNext(&p, .Number);
- try testing.expectError(error.UnexpectedClosingBrace, p.next());
-}
-
-test "json.token premature object close" {
- var p = TokenStream.init("{ \"key\": }");
- try checkNext(&p, .ObjectBegin);
- try checkNext(&p, .String);
- try testing.expectError(error.InvalidValueBegin, p.next());
-}
-
-test "json.validate" {
- try testing.expectEqual(true, validate("{}"));
- try testing.expectEqual(true, validate("[]"));
- try testing.expectEqual(true, validate("[{[[[[{}]]]]}]"));
- try testing.expectEqual(false, validate("{]"));
- try testing.expectEqual(false, validate("[}"));
- try testing.expectEqual(false, validate("{{{{[]}}}]"));
-}
-
-test "Value.jsonStringify" {
- {
- var buffer: [10]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
- try @as(Value, .Null).jsonStringify(.{}, fbs.writer());
- try testing.expectEqualSlices(u8, fbs.getWritten(), "null");
- }
- {
- var buffer: [10]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
- try (Value{ .Bool = true }).jsonStringify(.{}, fbs.writer());
- try testing.expectEqualSlices(u8, fbs.getWritten(), "true");
- }
- {
- var buffer: [10]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
- try (Value{ .Integer = 42 }).jsonStringify(.{}, fbs.writer());
- try testing.expectEqualSlices(u8, fbs.getWritten(), "42");
- }
- {
- var buffer: [10]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
- try (Value{ .NumberString = "43" }).jsonStringify(.{}, fbs.writer());
- try testing.expectEqualSlices(u8, fbs.getWritten(), "43");
- }
- {
- var buffer: [10]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
- try (Value{ .Float = 42 }).jsonStringify(.{}, fbs.writer());
- try testing.expectEqualSlices(u8, fbs.getWritten(), "4.2e+01");
- }
- {
- var buffer: [10]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
- try (Value{ .String = "weeee" }).jsonStringify(.{}, fbs.writer());
- try testing.expectEqualSlices(u8, fbs.getWritten(), "\"weeee\"");
- }
- {
- var buffer: [10]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
- var vals = [_]Value{
- .{ .Integer = 1 },
- .{ .Integer = 2 },
- .{ .NumberString = "3" },
- };
- try (Value{
- .Array = Array.fromOwnedSlice(undefined, &vals),
- }).jsonStringify(.{}, fbs.writer());
- try testing.expectEqualSlices(u8, fbs.getWritten(), "[1,2,3]");
- }
- {
- var buffer: [10]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
- var obj = ObjectMap.init(testing.allocator);
- defer obj.deinit();
- try obj.putNoClobber("a", .{ .String = "b" });
- try (Value{ .Object = obj }).jsonStringify(.{}, fbs.writer());
- try testing.expectEqualSlices(u8, fbs.getWritten(), "{\"a\":\"b\"}");
- }
+ try err("\"\xf4\x90\x80\x80\"");
+ try err("\"\xf5\x80\x80\x80\"");
+ try err("\"\xf6\x80\x80\x80\"");
+ try err("\"\xf7\x80\x80\x80\"");
+ try err("\"\xf8\x80\x80\x80\"");
+ try err("\"\xf9\x80\x80\x80\"");
+ try err("\"\xfa\x80\x80\x80\"");
+ try err("\"\xfb\x80\x80\x80\"");
+ try err("\"\xfc\x80\x80\x80\"");
+ try err("\"\xfd\x80\x80\x80\"");
+ try err("\"\xfe\x80\x80\x80\"");
+ try err("\"\xff\x80\x80\x80\"");
}
lib/std/json/write_stream.zig
@@ -1,14 +1,19 @@
-const std = @import("../std.zig");
+const std = @import("std");
const assert = std.debug.assert;
const maxInt = std.math.maxInt;
+const StringifyOptions = @import("./stringify.zig").StringifyOptions;
+const jsonStringify = @import("./stringify.zig").stringify;
+
+const Value = @import("./dynamic.zig").Value;
+
const State = enum {
- Complete,
- Value,
- ArrayStart,
- Array,
- ObjectStart,
- Object,
+ complete,
+ value,
+ array_start,
+ array,
+ object_start,
+ object,
};
/// Writes JSON ([RFC8259](https://tools.ietf.org/html/rfc8259)) formatted data
@@ -21,9 +26,9 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
pub const Stream = OutStream;
- whitespace: std.json.StringifyOptions.Whitespace = std.json.StringifyOptions.Whitespace{
+ whitespace: StringifyOptions.Whitespace = StringifyOptions.Whitespace{
.indent_level = 0,
- .indent = .{ .Space = 1 },
+ .indent = .{ .space = 1 },
},
stream: OutStream,
@@ -36,38 +41,38 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
.state_index = 1,
.state = undefined,
};
- self.state[0] = .Complete;
- self.state[1] = .Value;
+ self.state[0] = .complete;
+ self.state[1] = .value;
return self;
}
pub fn beginArray(self: *Self) !void {
- assert(self.state[self.state_index] == State.Value); // need to call arrayElem or objectField
+ assert(self.state[self.state_index] == State.value); // need to call arrayElem or objectField
try self.stream.writeByte('[');
- self.state[self.state_index] = State.ArrayStart;
+ self.state[self.state_index] = State.array_start;
self.whitespace.indent_level += 1;
}
pub fn beginObject(self: *Self) !void {
- assert(self.state[self.state_index] == State.Value); // need to call arrayElem or objectField
+ assert(self.state[self.state_index] == State.value); // need to call arrayElem or objectField
try self.stream.writeByte('{');
- self.state[self.state_index] = State.ObjectStart;
+ self.state[self.state_index] = State.object_start;
self.whitespace.indent_level += 1;
}
pub fn arrayElem(self: *Self) !void {
const state = self.state[self.state_index];
switch (state) {
- .Complete => unreachable,
- .Value => unreachable,
- .ObjectStart => unreachable,
- .Object => unreachable,
- .Array, .ArrayStart => {
- if (state == .Array) {
+ .complete => unreachable,
+ .value => unreachable,
+ .object_start => unreachable,
+ .object => unreachable,
+ .array, .array_start => {
+ if (state == .array) {
try self.stream.writeByte(',');
}
- self.state[self.state_index] = .Array;
- self.pushState(.Value);
+ self.state[self.state_index] = .array;
+ self.pushState(.value);
try self.indent();
},
}
@@ -76,16 +81,16 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
pub fn objectField(self: *Self, name: []const u8) !void {
const state = self.state[self.state_index];
switch (state) {
- .Complete => unreachable,
- .Value => unreachable,
- .ArrayStart => unreachable,
- .Array => unreachable,
- .Object, .ObjectStart => {
- if (state == .Object) {
+ .complete => unreachable,
+ .value => unreachable,
+ .array_start => unreachable,
+ .array => unreachable,
+ .object, .object_start => {
+ if (state == .object) {
try self.stream.writeByte(',');
}
- self.state[self.state_index] = .Object;
- self.pushState(.Value);
+ self.state[self.state_index] = .object;
+ self.pushState(.value);
try self.indent();
try self.writeEscapedString(name);
try self.stream.writeByte(':');
@@ -98,16 +103,16 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
pub fn endArray(self: *Self) !void {
switch (self.state[self.state_index]) {
- .Complete => unreachable,
- .Value => unreachable,
- .ObjectStart => unreachable,
- .Object => unreachable,
- .ArrayStart => {
+ .complete => unreachable,
+ .value => unreachable,
+ .object_start => unreachable,
+ .object => unreachable,
+ .array_start => {
self.whitespace.indent_level -= 1;
try self.stream.writeByte(']');
self.popState();
},
- .Array => {
+ .array => {
self.whitespace.indent_level -= 1;
try self.indent();
self.popState();
@@ -118,16 +123,16 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
pub fn endObject(self: *Self) !void {
switch (self.state[self.state_index]) {
- .Complete => unreachable,
- .Value => unreachable,
- .ArrayStart => unreachable,
- .Array => unreachable,
- .ObjectStart => {
+ .complete => unreachable,
+ .value => unreachable,
+ .array_start => unreachable,
+ .array => unreachable,
+ .object_start => {
self.whitespace.indent_level -= 1;
try self.stream.writeByte('}');
self.popState();
},
- .Object => {
+ .object => {
self.whitespace.indent_level -= 1;
try self.indent();
self.popState();
@@ -137,13 +142,13 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
}
pub fn emitNull(self: *Self) !void {
- assert(self.state[self.state_index] == State.Value);
+ assert(self.state[self.state_index] == State.value);
try self.stringify(null);
self.popState();
}
pub fn emitBool(self: *Self, value: bool) !void {
- assert(self.state[self.state_index] == State.Value);
+ assert(self.state[self.state_index] == State.value);
try self.stringify(value);
self.popState();
}
@@ -154,7 +159,7 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
/// in a IEEE 754 double float, otherwise emitted as a string to the full precision.
value: anytype,
) !void {
- assert(self.state[self.state_index] == State.Value);
+ assert(self.state[self.state_index] == State.value);
switch (@typeInfo(@TypeOf(value))) {
.Int => |info| {
if (info.bits < 53) {
@@ -183,7 +188,7 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
}
pub fn emitString(self: *Self, string: []const u8) !void {
- assert(self.state[self.state_index] == State.Value);
+ assert(self.state[self.state_index] == State.value);
try self.writeEscapedString(string);
self.popState();
}
@@ -194,9 +199,9 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
}
/// Writes the complete json into the output stream
- pub fn emitJson(self: *Self, json: std.json.Value) Stream.Error!void {
- assert(self.state[self.state_index] == State.Value);
- try self.stringify(json);
+ pub fn emitJson(self: *Self, value: Value) Stream.Error!void {
+ assert(self.state[self.state_index] == State.value);
+ try self.stringify(value);
self.popState();
}
@@ -215,7 +220,7 @@ pub fn WriteStream(comptime OutStream: type, comptime max_depth: usize) type {
}
fn stringify(self: *Self, value: anytype) !void {
- try std.json.stringify(value, std.json.StringifyOptions{
+ try jsonStringify(value, StringifyOptions{
.whitespace = self.whitespace,
}, self.stream);
}
@@ -229,6 +234,8 @@ pub fn writeStream(
return WriteStream(@TypeOf(out_stream), max_depth).init(out_stream);
}
+const ObjectMap = @import("./dynamic.zig").ObjectMap;
+
test "json write stream" {
var out_buf: [1024]u8 = undefined;
var slice_stream = std.io.fixedBufferStream(&out_buf);
@@ -237,7 +244,7 @@ test "json write stream" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
- var w = std.json.writeStream(out, 10);
+ var w = writeStream(out, 10);
try w.beginObject();
@@ -285,9 +292,9 @@ test "json write stream" {
try std.testing.expect(std.mem.eql(u8, expected, result));
}
-fn getJsonObject(allocator: std.mem.Allocator) !std.json.Value {
- var value = std.json.Value{ .Object = std.json.ObjectMap.init(allocator) };
- try value.Object.put("one", std.json.Value{ .Integer = @intCast(i64, 1) });
- try value.Object.put("two", std.json.Value{ .Float = 2.0 });
+fn getJsonObject(allocator: std.mem.Allocator) !Value {
+ var value = Value{ .object = ObjectMap.init(allocator) };
+ try value.object.put("one", Value{ .integer = @intCast(i64, 1) });
+ try value.object.put("two", Value{ .float = 2.0 });
return value;
}
lib/std/json.zig
@@ -1,2818 +1,59 @@
-// JSON parser conforming to RFC8259.
-//
-// https://tools.ietf.org/html/rfc8259
-
-const builtin = @import("builtin");
-const std = @import("std.zig");
-const debug = std.debug;
-const assert = debug.assert;
-const testing = std.testing;
-const mem = std.mem;
-const maxInt = std.math.maxInt;
+//! JSON parsing and stringification conforming to RFC 8259. https://datatracker.ietf.org/doc/html/rfc8259
+//!
+//! The low-level `Scanner` API reads from an input slice or successive slices of inputs,
+//! The `Reader` API connects a `std.io.Reader` to a `Scanner`.
+//!
+//! The high-level `parseFromSlice` and `parseFromTokenSource` deserializes a JSON document into a Zig type.
+//! The high-level `Parser` parses any JSON document into a dynamically typed `ValueTree` that has its own memory arena.
+//!
+//! The low-level `writeStream` emits syntax-conformant JSON tokens to a `std.io.Writer`.
+//! The high-level `stringify` serializes a Zig type into JSON.
+
+pub const ValueTree = @import("json/dynamic.zig").ValueTree;
+pub const ObjectMap = @import("json/dynamic.zig").ObjectMap;
+pub const Array = @import("json/dynamic.zig").Array;
+pub const Value = @import("json/dynamic.zig").Value;
+pub const Parser = @import("json/dynamic.zig").Parser;
+
+pub const validate = @import("json/scanner.zig").validate;
+pub const Error = @import("json/scanner.zig").Error;
+pub const reader = @import("json/scanner.zig").reader;
+pub const default_buffer_size = @import("json/scanner.zig").default_buffer_size;
+pub const Token = @import("json/scanner.zig").Token;
+pub const TokenType = @import("json/scanner.zig").TokenType;
+pub const Diagnostics = @import("json/scanner.zig").Diagnostics;
+pub const AllocWhen = @import("json/scanner.zig").AllocWhen;
+pub const default_max_value_len = @import("json/scanner.zig").default_max_value_len;
+pub const Reader = @import("json/scanner.zig").Reader;
+pub const Scanner = @import("json/scanner.zig").Scanner;
+pub const isNumberFormattedLikeAnInteger = @import("json/scanner.zig").isNumberFormattedLikeAnInteger;
+
+pub const ParseOptions = @import("json/static.zig").ParseOptions;
+pub const parseFromSlice = @import("json/static.zig").parseFromSlice;
+pub const parseFromTokenSource = @import("json/static.zig").parseFromTokenSource;
+pub const ParseError = @import("json/static.zig").ParseError;
+pub const parseFree = @import("json/static.zig").parseFree;
+
+pub const StringifyOptions = @import("json/stringify.zig").StringifyOptions;
+pub const encodeJsonString = @import("json/stringify.zig").encodeJsonString;
+pub const encodeJsonStringChars = @import("json/stringify.zig").encodeJsonStringChars;
+pub const stringify = @import("json/stringify.zig").stringify;
+pub const stringifyAlloc = @import("json/stringify.zig").stringifyAlloc;
pub const WriteStream = @import("json/write_stream.zig").WriteStream;
pub const writeStream = @import("json/write_stream.zig").writeStream;
-const StringEscapes = union(enum) {
- None,
-
- Some: struct {
- size_diff: isize,
- },
-};
-
-/// Checks to see if a string matches what it would be as a json-encoded string
-/// Assumes that `encoded` is a well-formed json string
-fn encodesTo(decoded: []const u8, encoded: []const u8) bool {
- var i: usize = 0;
- var j: usize = 0;
- while (i < decoded.len) {
- if (j >= encoded.len) return false;
- if (encoded[j] != '\\') {
- if (decoded[i] != encoded[j]) return false;
- j += 1;
- i += 1;
- } else {
- const escape_type = encoded[j + 1];
- if (escape_type != 'u') {
- const t: u8 = switch (escape_type) {
- '\\' => '\\',
- '/' => '/',
- 'n' => '\n',
- 'r' => '\r',
- 't' => '\t',
- 'f' => 12,
- 'b' => 8,
- '"' => '"',
- else => unreachable,
- };
- if (decoded[i] != t) return false;
- j += 2;
- i += 1;
- } else {
- var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
- j += 6;
- if (codepoint >= 0xD800 and codepoint < 0xDC00) {
- // surrogate pair
- assert(encoded[j] == '\\');
- assert(encoded[j + 1] == 'u');
- const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
- codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff));
- j += 6;
- }
- var buf: [4]u8 = undefined;
- const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
- if (i + len > decoded.len) return false;
- if (!mem.eql(u8, decoded[i..][0..len], buf[0..len])) return false;
- i += len;
- }
- }
- }
- assert(i == decoded.len);
- assert(j == encoded.len);
- return true;
-}
-
-/// A single token slice into the parent string.
-///
-/// Use `token.slice()` on the input at the current position to get the current slice.
-pub const Token = union(enum) {
- ObjectBegin,
- ObjectEnd,
- ArrayBegin,
- ArrayEnd,
- String: struct {
- /// How many bytes the token is.
- count: usize,
-
- /// Whether string contains an escape sequence and cannot be zero-copied
- escapes: StringEscapes,
-
- pub fn decodedLength(self: @This()) usize {
- return self.count +% switch (self.escapes) {
- .None => 0,
- .Some => |s| @bitCast(usize, s.size_diff),
- };
- }
-
- /// Slice into the underlying input string.
- pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 {
- return input[i - self.count .. i];
- }
- },
- Number: struct {
- /// How many bytes the token is.
- count: usize,
-
- /// Whether number is simple and can be represented by an integer (i.e. no `.` or `e`)
- is_integer: bool,
-
- /// Slice into the underlying input string.
- pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 {
- return input[i - self.count .. i];
- }
- },
- True,
- False,
- Null,
-};
-
-const AggregateContainerType = enum(u1) { object, array };
-
-// A LIFO bit-stack. Tracks which container-types have been entered during parse.
-fn AggregateContainerStack(comptime n: usize) type {
- return struct {
- const Self = @This();
-
- const element_bitcount = 8 * @sizeOf(usize);
- const element_count = n / element_bitcount;
- const ElementType = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = element_bitcount } });
- const ElementShiftAmountType = std.math.Log2Int(ElementType);
-
- comptime {
- std.debug.assert(n % element_bitcount == 0);
- }
-
- memory: [element_count]ElementType,
- len: usize,
-
- pub fn init(self: *Self) void {
- self.memory = [_]ElementType{0} ** element_count;
- self.len = 0;
- }
-
- pub fn push(self: *Self, ty: AggregateContainerType) ?void {
- if (self.len >= n) {
- return null;
- }
-
- const index = self.len / element_bitcount;
- const sub_index = @intCast(ElementShiftAmountType, self.len % element_bitcount);
- const clear_mask = ~(@as(ElementType, 1) << sub_index);
- const set_bits = @as(ElementType, @enumToInt(ty)) << sub_index;
-
- self.memory[index] &= clear_mask;
- self.memory[index] |= set_bits;
- self.len += 1;
- }
-
- pub fn peek(self: *Self) ?AggregateContainerType {
- if (self.len == 0) {
- return null;
- }
-
- const bit_to_extract = self.len - 1;
- const index = bit_to_extract / element_bitcount;
- const sub_index = @intCast(ElementShiftAmountType, bit_to_extract % element_bitcount);
- const bit = @intCast(u1, (self.memory[index] >> sub_index) & 1);
- return @intToEnum(AggregateContainerType, bit);
- }
-
- pub fn pop(self: *Self) ?AggregateContainerType {
- if (self.peek()) |ty| {
- self.len -= 1;
- return ty;
- }
-
- return null;
- }
- };
-}
-
-/// A small streaming JSON parser. This accepts input one byte at a time and returns tokens as
-/// they are encountered. No copies or allocations are performed during parsing and the entire
-/// parsing state requires ~40-50 bytes of stack space.
-///
-/// Conforms strictly to RFC8259.
-///
-/// For a non-byte based wrapper, consider using TokenStream instead.
-pub const StreamingParser = struct {
- const default_max_nestings = 256;
-
- // Current state
- state: State,
- // How many bytes we have counted for the current token
- count: usize,
- // What state to follow after parsing a string (either property or value string)
- after_string_state: State,
- // What state to follow after parsing a value (either top-level or value end)
- after_value_state: State,
- // If we stopped now, would the complete parsed string to now be a valid json string
- complete: bool,
- // Current token flags to pass through to the next generated, see Token.
- string_escapes: StringEscapes,
- // When in .String states, was the previous character a high surrogate?
- string_last_was_high_surrogate: bool,
- // Used inside of StringEscapeHexUnicode* states
- string_unicode_codepoint: u21,
- // The first byte needs to be stored to validate 3- and 4-byte sequences.
- sequence_first_byte: u8 = undefined,
- // When in .Number states, is the number a (still) valid integer?
- number_is_integer: bool,
- // Bit-stack for nested object/map literals (max 256 nestings).
- stack: AggregateContainerStack(default_max_nestings),
-
- pub fn init() StreamingParser {
- var p: StreamingParser = undefined;
- p.reset();
- return p;
- }
-
- pub fn reset(p: *StreamingParser) void {
- p.state = .TopLevelBegin;
- p.count = 0;
- // Set before ever read in main transition function
- p.after_string_state = undefined;
- p.after_value_state = .ValueEnd; // handle end of values normally
- p.stack.init();
- p.complete = false;
- p.string_escapes = undefined;
- p.string_last_was_high_surrogate = undefined;
- p.string_unicode_codepoint = undefined;
- p.number_is_integer = undefined;
- }
-
- pub const State = enum(u8) {
- // These must be first with these explicit values as we rely on them for indexing the
- // bit-stack directly and avoiding a branch.
- ObjectSeparator = 0,
- ValueEnd = 1,
-
- TopLevelBegin,
- TopLevelEnd,
-
- ValueBegin,
- ValueBeginNoClosing,
-
- String,
- StringUtf8Byte2Of2,
- StringUtf8Byte2Of3,
- StringUtf8Byte3Of3,
- StringUtf8Byte2Of4,
- StringUtf8Byte3Of4,
- StringUtf8Byte4Of4,
- StringEscapeCharacter,
- StringEscapeHexUnicode4,
- StringEscapeHexUnicode3,
- StringEscapeHexUnicode2,
- StringEscapeHexUnicode1,
-
- Number,
- NumberMaybeDotOrExponent,
- NumberMaybeDigitOrDotOrExponent,
- NumberFractionalRequired,
- NumberFractional,
- NumberMaybeExponent,
- NumberExponent,
- NumberExponentDigitsRequired,
- NumberExponentDigits,
-
- TrueLiteral1,
- TrueLiteral2,
- TrueLiteral3,
-
- FalseLiteral1,
- FalseLiteral2,
- FalseLiteral3,
- FalseLiteral4,
-
- NullLiteral1,
- NullLiteral2,
- NullLiteral3,
-
- // Given an aggregate container type, return the state which should be entered after
- // processing a complete value type.
- pub fn fromAggregateContainerType(ty: AggregateContainerType) State {
- comptime {
- std.debug.assert(@enumToInt(AggregateContainerType.object) == @enumToInt(State.ObjectSeparator));
- std.debug.assert(@enumToInt(AggregateContainerType.array) == @enumToInt(State.ValueEnd));
- }
-
- return @intToEnum(State, @enumToInt(ty));
- }
- };
-
- pub const Error = error{
- InvalidTopLevel,
- TooManyNestedItems,
- TooManyClosingItems,
- InvalidValueBegin,
- InvalidValueEnd,
- UnbalancedBrackets,
- UnbalancedBraces,
- UnexpectedClosingBracket,
- UnexpectedClosingBrace,
- InvalidNumber,
- InvalidSeparator,
- InvalidLiteral,
- InvalidEscapeCharacter,
- InvalidUnicodeHexSymbol,
- InvalidUtf8Byte,
- InvalidTopLevelTrailing,
- InvalidControlCharacter,
- };
-
- /// Give another byte to the parser and obtain any new tokens. This may (rarely) return two
- /// tokens. token2 is always null if token1 is null.
- ///
- /// There is currently no error recovery on a bad stream.
- pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
- token1.* = null;
- token2.* = null;
- p.count += 1;
-
- // unlikely
- if (try p.transition(c, token1)) {
- _ = try p.transition(c, token2);
- }
- }
-
- // Perform a single transition on the state machine and return any possible token.
- fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
- switch (p.state) {
- .TopLevelBegin => switch (c) {
- '{' => {
- p.stack.push(.object) orelse return error.TooManyNestedItems;
- p.state = .ValueBegin;
- p.after_string_state = .ObjectSeparator;
-
- token.* = Token.ObjectBegin;
- },
- '[' => {
- p.stack.push(.array) orelse return error.TooManyNestedItems;
- p.state = .ValueBegin;
- p.after_string_state = .ValueEnd;
-
- token.* = Token.ArrayBegin;
- },
- '-' => {
- p.number_is_integer = true;
- p.state = .Number;
- p.after_value_state = .TopLevelEnd;
- p.count = 0;
- },
- '0' => {
- p.number_is_integer = true;
- p.state = .NumberMaybeDotOrExponent;
- p.after_value_state = .TopLevelEnd;
- p.count = 0;
- },
- '1'...'9' => {
- p.number_is_integer = true;
- p.state = .NumberMaybeDigitOrDotOrExponent;
- p.after_value_state = .TopLevelEnd;
- p.count = 0;
- },
- '"' => {
- p.state = .String;
- p.after_value_state = .TopLevelEnd;
- // We don't actually need the following since after_value_state should override.
- p.after_string_state = .ValueEnd;
- p.string_escapes = .None;
- p.string_last_was_high_surrogate = false;
- p.count = 0;
- },
- 't' => {
- p.state = .TrueLiteral1;
- p.after_value_state = .TopLevelEnd;
- p.count = 0;
- },
- 'f' => {
- p.state = .FalseLiteral1;
- p.after_value_state = .TopLevelEnd;
- p.count = 0;
- },
- 'n' => {
- p.state = .NullLiteral1;
- p.after_value_state = .TopLevelEnd;
- p.count = 0;
- },
- 0x09, 0x0A, 0x0D, 0x20 => {
- // whitespace
- },
- else => {
- return error.InvalidTopLevel;
- },
- },
-
- .TopLevelEnd => switch (c) {
- 0x09, 0x0A, 0x0D, 0x20 => {
- // whitespace
- },
- else => {
- return error.InvalidTopLevelTrailing;
- },
- },
-
- .ValueBegin => switch (c) {
- // NOTE: These are shared in ValueEnd as well, think we can reorder states to
- // be a bit clearer and avoid this duplication.
- '}' => {
- const last_type = p.stack.peek() orelse return error.TooManyClosingItems;
-
- if (last_type != .object) {
- return error.UnexpectedClosingBrace;
- }
-
- _ = p.stack.pop();
- p.state = .ValueBegin;
- p.after_string_state = State.fromAggregateContainerType(last_type);
-
- switch (p.stack.len) {
- 0 => {
- p.complete = true;
- p.state = .TopLevelEnd;
- },
- else => {
- p.state = .ValueEnd;
- },
- }
-
- token.* = Token.ObjectEnd;
- },
- ']' => {
- const last_type = p.stack.peek() orelse return error.TooManyClosingItems;
-
- if (last_type != .array) {
- return error.UnexpectedClosingBracket;
- }
-
- _ = p.stack.pop();
- p.state = .ValueBegin;
- p.after_string_state = State.fromAggregateContainerType(last_type);
-
- switch (p.stack.len) {
- 0 => {
- p.complete = true;
- p.state = .TopLevelEnd;
- },
- else => {
- p.state = .ValueEnd;
- },
- }
-
- token.* = Token.ArrayEnd;
- },
- '{' => {
- p.stack.push(.object) orelse return error.TooManyNestedItems;
-
- p.state = .ValueBegin;
- p.after_string_state = .ObjectSeparator;
-
- token.* = Token.ObjectBegin;
- },
- '[' => {
- p.stack.push(.array) orelse return error.TooManyNestedItems;
-
- p.state = .ValueBegin;
- p.after_string_state = .ValueEnd;
-
- token.* = Token.ArrayBegin;
- },
- '-' => {
- p.number_is_integer = true;
- p.state = .Number;
- p.count = 0;
- },
- '0' => {
- p.number_is_integer = true;
- p.state = .NumberMaybeDotOrExponent;
- p.count = 0;
- },
- '1'...'9' => {
- p.number_is_integer = true;
- p.state = .NumberMaybeDigitOrDotOrExponent;
- p.count = 0;
- },
- '"' => {
- p.state = .String;
- p.string_escapes = .None;
- p.string_last_was_high_surrogate = false;
- p.count = 0;
- },
- 't' => {
- p.state = .TrueLiteral1;
- p.count = 0;
- },
- 'f' => {
- p.state = .FalseLiteral1;
- p.count = 0;
- },
- 'n' => {
- p.state = .NullLiteral1;
- p.count = 0;
- },
- 0x09, 0x0A, 0x0D, 0x20 => {
- // whitespace
- },
- else => {
- return error.InvalidValueBegin;
- },
- },
-
- // TODO: A bit of duplication here and in the following state, redo.
- .ValueBeginNoClosing => switch (c) {
- '{' => {
- p.stack.push(.object) orelse return error.TooManyNestedItems;
-
- p.state = .ValueBegin;
- p.after_string_state = .ObjectSeparator;
-
- token.* = Token.ObjectBegin;
- },
- '[' => {
- p.stack.push(.array) orelse return error.TooManyNestedItems;
-
- p.state = .ValueBegin;
- p.after_string_state = .ValueEnd;
-
- token.* = Token.ArrayBegin;
- },
- '-' => {
- p.number_is_integer = true;
- p.state = .Number;
- p.count = 0;
- },
- '0' => {
- p.number_is_integer = true;
- p.state = .NumberMaybeDotOrExponent;
- p.count = 0;
- },
- '1'...'9' => {
- p.number_is_integer = true;
- p.state = .NumberMaybeDigitOrDotOrExponent;
- p.count = 0;
- },
- '"' => {
- p.state = .String;
- p.string_escapes = .None;
- p.string_last_was_high_surrogate = false;
- p.count = 0;
- },
- 't' => {
- p.state = .TrueLiteral1;
- p.count = 0;
- },
- 'f' => {
- p.state = .FalseLiteral1;
- p.count = 0;
- },
- 'n' => {
- p.state = .NullLiteral1;
- p.count = 0;
- },
- 0x09, 0x0A, 0x0D, 0x20 => {
- // whitespace
- },
- else => {
- return error.InvalidValueBegin;
- },
- },
-
- .ValueEnd => switch (c) {
- ',' => {
- const last_type = p.stack.peek() orelse unreachable;
- p.after_string_state = State.fromAggregateContainerType(last_type);
- p.state = .ValueBeginNoClosing;
- },
- ']' => {
- const last_type = p.stack.peek() orelse return error.TooManyClosingItems;
-
- if (last_type != .array) {
- return error.UnexpectedClosingBracket;
- }
-
- _ = p.stack.pop();
- p.state = .ValueEnd;
- p.after_string_state = State.fromAggregateContainerType(last_type);
-
- if (p.stack.len == 0) {
- p.complete = true;
- p.state = .TopLevelEnd;
- }
-
- token.* = Token.ArrayEnd;
- },
- '}' => {
- const last_type = p.stack.peek() orelse return error.TooManyClosingItems;
-
- if (last_type != .object) {
- return error.UnexpectedClosingBrace;
- }
-
- _ = p.stack.pop();
- p.state = .ValueEnd;
- p.after_string_state = State.fromAggregateContainerType(last_type);
-
- if (p.stack.len == 0) {
- p.complete = true;
- p.state = .TopLevelEnd;
- }
-
- token.* = Token.ObjectEnd;
- },
- 0x09, 0x0A, 0x0D, 0x20 => {
- // whitespace
- },
- else => {
- return error.InvalidValueEnd;
- },
- },
-
- .ObjectSeparator => switch (c) {
- ':' => {
- p.state = .ValueBeginNoClosing;
- p.after_string_state = .ValueEnd;
- },
- 0x09, 0x0A, 0x0D, 0x20 => {
- // whitespace
- },
- else => {
- return error.InvalidSeparator;
- },
- },
-
- .String => switch (c) {
- 0x00...0x1F => {
- return error.InvalidControlCharacter;
- },
- '"' => {
- p.state = p.after_string_state;
- if (p.after_value_state == .TopLevelEnd) {
- p.state = .TopLevelEnd;
- p.complete = true;
- }
-
- token.* = .{
- .String = .{
- .count = p.count - 1,
- .escapes = p.string_escapes,
- },
- };
- p.string_escapes = undefined;
- p.string_last_was_high_surrogate = undefined;
- },
- '\\' => {
- p.state = .StringEscapeCharacter;
- switch (p.string_escapes) {
- .None => {
- p.string_escapes = .{ .Some = .{ .size_diff = 0 } };
- },
- .Some => {},
- }
- },
- 0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => {
- // non-control ascii
- p.string_last_was_high_surrogate = false;
- },
- 0xC2...0xDF => {
- p.state = .StringUtf8Byte2Of2;
- },
- 0xE0...0xEF => {
- p.state = .StringUtf8Byte2Of3;
- p.sequence_first_byte = c;
- },
- 0xF0...0xF4 => {
- p.state = .StringUtf8Byte2Of4;
- p.sequence_first_byte = c;
- },
- else => {
- return error.InvalidUtf8Byte;
- },
- },
-
- .StringUtf8Byte2Of2 => switch (c >> 6) {
- 0b10 => p.state = .String,
- else => return error.InvalidUtf8Byte,
- },
- .StringUtf8Byte2Of3 => {
- switch (p.sequence_first_byte) {
- 0xE0 => switch (c) {
- 0xA0...0xBF => {},
- else => return error.InvalidUtf8Byte,
- },
- 0xE1...0xEF => switch (c) {
- 0x80...0xBF => {},
- else => return error.InvalidUtf8Byte,
- },
- else => return error.InvalidUtf8Byte,
- }
- p.state = .StringUtf8Byte3Of3;
- },
- .StringUtf8Byte3Of3 => switch (c) {
- 0x80...0xBF => p.state = .String,
- else => return error.InvalidUtf8Byte,
- },
- .StringUtf8Byte2Of4 => {
- switch (p.sequence_first_byte) {
- 0xF0 => switch (c) {
- 0x90...0xBF => {},
- else => return error.InvalidUtf8Byte,
- },
- 0xF1...0xF3 => switch (c) {
- 0x80...0xBF => {},
- else => return error.InvalidUtf8Byte,
- },
- 0xF4 => switch (c) {
- 0x80...0x8F => {},
- else => return error.InvalidUtf8Byte,
- },
- else => return error.InvalidUtf8Byte,
- }
- p.state = .StringUtf8Byte3Of4;
- },
- .StringUtf8Byte3Of4 => switch (c) {
- 0x80...0xBF => p.state = .StringUtf8Byte4Of4,
- else => return error.InvalidUtf8Byte,
- },
- .StringUtf8Byte4Of4 => switch (c) {
- 0x80...0xBF => p.state = .String,
- else => return error.InvalidUtf8Byte,
- },
-
- .StringEscapeCharacter => switch (c) {
- // NOTE: '/' is allowed as an escaped character but it also is allowed
- // as unescaped according to the RFC. There is a reported errata which suggests
- // removing the non-escaped variant but it makes more sense to simply disallow
- // it as an escape code here.
- //
- // The current JSONTestSuite tests rely on both of this behaviour being present
- // however, so we default to the status quo where both are accepted until this
- // is further clarified.
- '"', '\\', '/', 'b', 'f', 'n', 'r', 't' => {
- p.string_escapes.Some.size_diff -= 1;
- p.state = .String;
- p.string_last_was_high_surrogate = false;
- },
- 'u' => {
- p.state = .StringEscapeHexUnicode4;
- },
- else => {
- return error.InvalidEscapeCharacter;
- },
- },
-
- .StringEscapeHexUnicode4 => {
- var codepoint: u21 = undefined;
- switch (c) {
- else => return error.InvalidUnicodeHexSymbol,
- '0'...'9' => {
- codepoint = c - '0';
- },
- 'A'...'F' => {
- codepoint = c - 'A' + 10;
- },
- 'a'...'f' => {
- codepoint = c - 'a' + 10;
- },
- }
- p.state = .StringEscapeHexUnicode3;
- p.string_unicode_codepoint = codepoint << 12;
- },
-
- .StringEscapeHexUnicode3 => {
- var codepoint: u21 = undefined;
- switch (c) {
- else => return error.InvalidUnicodeHexSymbol,
- '0'...'9' => {
- codepoint = c - '0';
- },
- 'A'...'F' => {
- codepoint = c - 'A' + 10;
- },
- 'a'...'f' => {
- codepoint = c - 'a' + 10;
- },
- }
- p.state = .StringEscapeHexUnicode2;
- p.string_unicode_codepoint |= codepoint << 8;
- },
-
- .StringEscapeHexUnicode2 => {
- var codepoint: u21 = undefined;
- switch (c) {
- else => return error.InvalidUnicodeHexSymbol,
- '0'...'9' => {
- codepoint = c - '0';
- },
- 'A'...'F' => {
- codepoint = c - 'A' + 10;
- },
- 'a'...'f' => {
- codepoint = c - 'a' + 10;
- },
- }
- p.state = .StringEscapeHexUnicode1;
- p.string_unicode_codepoint |= codepoint << 4;
- },
-
- .StringEscapeHexUnicode1 => {
- var codepoint: u21 = undefined;
- switch (c) {
- else => return error.InvalidUnicodeHexSymbol,
- '0'...'9' => {
- codepoint = c - '0';
- },
- 'A'...'F' => {
- codepoint = c - 'A' + 10;
- },
- 'a'...'f' => {
- codepoint = c - 'a' + 10;
- },
- }
- p.state = .String;
- p.string_unicode_codepoint |= codepoint;
- if (p.string_unicode_codepoint < 0xD800 or p.string_unicode_codepoint >= 0xE000) {
- // not part of surrogate pair
- p.string_escapes.Some.size_diff -= @as(isize, 6 - (std.unicode.utf8CodepointSequenceLength(p.string_unicode_codepoint) catch unreachable));
- p.string_last_was_high_surrogate = false;
- } else if (p.string_unicode_codepoint < 0xDC00) {
- // 'high' surrogate
- // takes 3 bytes to encode a half surrogate pair into wtf8
- p.string_escapes.Some.size_diff -= 6 - 3;
- p.string_last_was_high_surrogate = true;
- } else {
- // 'low' surrogate
- p.string_escapes.Some.size_diff -= 6;
- if (p.string_last_was_high_surrogate) {
- // takes 4 bytes to encode a full surrogate pair into utf8
- // 3 bytes are already reserved by high surrogate
- p.string_escapes.Some.size_diff -= -1;
- } else {
- // takes 3 bytes to encode a half surrogate pair into wtf8
- p.string_escapes.Some.size_diff -= -3;
- }
- p.string_last_was_high_surrogate = false;
- }
- p.string_unicode_codepoint = undefined;
- },
-
- .Number => {
- p.complete = p.after_value_state == .TopLevelEnd;
- switch (c) {
- '0' => {
- p.state = .NumberMaybeDotOrExponent;
- },
- '1'...'9' => {
- p.state = .NumberMaybeDigitOrDotOrExponent;
- },
- else => {
- return error.InvalidNumber;
- },
- }
- },
-
- .NumberMaybeDotOrExponent => {
- p.complete = p.after_value_state == .TopLevelEnd;
- switch (c) {
- '.' => {
- p.number_is_integer = false;
- p.state = .NumberFractionalRequired;
- },
- 'e', 'E' => {
- p.number_is_integer = false;
- p.state = .NumberExponent;
- },
- else => {
- p.state = p.after_value_state;
- token.* = .{
- .Number = .{
- .count = p.count,
- .is_integer = p.number_is_integer,
- },
- };
- p.number_is_integer = undefined;
- return true;
- },
- }
- },
-
- .NumberMaybeDigitOrDotOrExponent => {
- p.complete = p.after_value_state == .TopLevelEnd;
- switch (c) {
- '.' => {
- p.number_is_integer = false;
- p.state = .NumberFractionalRequired;
- },
- 'e', 'E' => {
- p.number_is_integer = false;
- p.state = .NumberExponent;
- },
- '0'...'9' => {
- // another digit
- },
- else => {
- p.state = p.after_value_state;
- token.* = .{
- .Number = .{
- .count = p.count,
- .is_integer = p.number_is_integer,
- },
- };
- return true;
- },
- }
- },
-
- .NumberFractionalRequired => {
- p.complete = p.after_value_state == .TopLevelEnd;
- switch (c) {
- '0'...'9' => {
- p.state = .NumberFractional;
- },
- else => {
- return error.InvalidNumber;
- },
- }
- },
-
- .NumberFractional => {
- p.complete = p.after_value_state == .TopLevelEnd;
- switch (c) {
- '0'...'9' => {
- // another digit
- },
- 'e', 'E' => {
- p.number_is_integer = false;
- p.state = .NumberExponent;
- },
- else => {
- p.state = p.after_value_state;
- token.* = .{
- .Number = .{
- .count = p.count,
- .is_integer = p.number_is_integer,
- },
- };
- return true;
- },
- }
- },
-
- .NumberMaybeExponent => {
- p.complete = p.after_value_state == .TopLevelEnd;
- switch (c) {
- 'e', 'E' => {
- p.number_is_integer = false;
- p.state = .NumberExponent;
- },
- else => {
- p.state = p.after_value_state;
- token.* = .{
- .Number = .{
- .count = p.count,
- .is_integer = p.number_is_integer,
- },
- };
- return true;
- },
- }
- },
-
- .NumberExponent => switch (c) {
- '-', '+' => {
- p.complete = false;
- p.state = .NumberExponentDigitsRequired;
- },
- '0'...'9' => {
- p.complete = p.after_value_state == .TopLevelEnd;
- p.state = .NumberExponentDigits;
- },
- else => {
- return error.InvalidNumber;
- },
- },
-
- .NumberExponentDigitsRequired => switch (c) {
- '0'...'9' => {
- p.complete = p.after_value_state == .TopLevelEnd;
- p.state = .NumberExponentDigits;
- },
- else => {
- return error.InvalidNumber;
- },
- },
-
- .NumberExponentDigits => {
- p.complete = p.after_value_state == .TopLevelEnd;
- switch (c) {
- '0'...'9' => {
- // another digit
- },
- else => {
- p.state = p.after_value_state;
- token.* = .{
- .Number = .{
- .count = p.count,
- .is_integer = p.number_is_integer,
- },
- };
- return true;
- },
- }
- },
-
- .TrueLiteral1 => switch (c) {
- 'r' => p.state = .TrueLiteral2,
- else => return error.InvalidLiteral,
- },
-
- .TrueLiteral2 => switch (c) {
- 'u' => p.state = .TrueLiteral3,
- else => return error.InvalidLiteral,
- },
-
- .TrueLiteral3 => switch (c) {
- 'e' => {
- p.state = p.after_value_state;
- p.complete = p.state == .TopLevelEnd;
- token.* = Token.True;
- },
- else => {
- return error.InvalidLiteral;
- },
- },
-
- .FalseLiteral1 => switch (c) {
- 'a' => p.state = .FalseLiteral2,
- else => return error.InvalidLiteral,
- },
-
- .FalseLiteral2 => switch (c) {
- 'l' => p.state = .FalseLiteral3,
- else => return error.InvalidLiteral,
- },
-
- .FalseLiteral3 => switch (c) {
- 's' => p.state = .FalseLiteral4,
- else => return error.InvalidLiteral,
- },
-
- .FalseLiteral4 => switch (c) {
- 'e' => {
- p.state = p.after_value_state;
- p.complete = p.state == .TopLevelEnd;
- token.* = Token.False;
- },
- else => {
- return error.InvalidLiteral;
- },
- },
-
- .NullLiteral1 => switch (c) {
- 'u' => p.state = .NullLiteral2,
- else => return error.InvalidLiteral,
- },
-
- .NullLiteral2 => switch (c) {
- 'l' => p.state = .NullLiteral3,
- else => return error.InvalidLiteral,
- },
-
- .NullLiteral3 => switch (c) {
- 'l' => {
- p.state = p.after_value_state;
- p.complete = p.state == .TopLevelEnd;
- token.* = Token.Null;
- },
- else => {
- return error.InvalidLiteral;
- },
- },
- }
-
- return false;
- }
-};
-
-/// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
-pub const TokenStream = struct {
- i: usize,
- slice: []const u8,
- parser: StreamingParser,
- token: ?Token,
-
- pub const Error = StreamingParser.Error || error{UnexpectedEndOfJson};
-
- pub fn init(slice: []const u8) TokenStream {
- return TokenStream{
- .i = 0,
- .slice = slice,
- .parser = StreamingParser.init(),
- .token = null,
- };
- }
-
- fn stackUsed(self: *TokenStream) usize {
- return self.parser.stack.len + if (self.token != null) @as(usize, 1) else 0;
- }
-
- pub fn next(self: *TokenStream) Error!?Token {
- if (self.token) |token| {
- self.token = null;
- return token;
- }
-
- var t1: ?Token = undefined;
- var t2: ?Token = undefined;
-
- while (self.i < self.slice.len) {
- try self.parser.feed(self.slice[self.i], &t1, &t2);
- self.i += 1;
-
- if (t1) |token| {
- self.token = t2;
- return token;
- }
- }
-
- // Without this a bare number fails, the streaming parser doesn't know the input ended
- try self.parser.feed(' ', &t1, &t2);
- self.i += 1;
-
- if (t1) |token| {
- return token;
- } else if (self.parser.complete) {
- return null;
- } else {
- return error.UnexpectedEndOfJson;
- }
- }
-};
-
-/// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
-/// be able to decode the string even if this returns true.
-pub fn validate(s: []const u8) bool {
- var p = StreamingParser.init();
-
- for (s) |c| {
- var token1: ?Token = undefined;
- var token2: ?Token = undefined;
-
- p.feed(c, &token1, &token2) catch {
- return false;
- };
- }
-
- return p.complete;
-}
-
-const Allocator = std.mem.Allocator;
-const ArenaAllocator = std.heap.ArenaAllocator;
-const ArrayList = std.ArrayList;
-const StringArrayHashMap = std.StringArrayHashMap;
-
-pub const ValueTree = struct {
- arena: *ArenaAllocator,
- root: Value,
-
- pub fn deinit(self: *ValueTree) void {
- self.arena.deinit();
- self.arena.child_allocator.destroy(self.arena);
- }
-};
-
-pub const ObjectMap = StringArrayHashMap(Value);
-pub const Array = ArrayList(Value);
-
-/// Represents a JSON value
-/// Currently only supports numbers that fit into i64 or f64.
-pub const Value = union(enum) {
- Null,
- Bool: bool,
- Integer: i64,
- Float: f64,
- NumberString: []const u8,
- String: []const u8,
- Array: Array,
- Object: ObjectMap,
-
- pub fn jsonStringify(
- value: @This(),
- options: StringifyOptions,
- out_stream: anytype,
- ) @TypeOf(out_stream).Error!void {
- switch (value) {
- .Null => try stringify(null, options, out_stream),
- .Bool => |inner| try stringify(inner, options, out_stream),
- .Integer => |inner| try stringify(inner, options, out_stream),
- .Float => |inner| try stringify(inner, options, out_stream),
- .NumberString => |inner| try out_stream.writeAll(inner),
- .String => |inner| try stringify(inner, options, out_stream),
- .Array => |inner| try stringify(inner.items, options, out_stream),
- .Object => |inner| {
- try out_stream.writeByte('{');
- var field_output = false;
- var child_options = options;
- if (child_options.whitespace) |*child_whitespace| {
- child_whitespace.indent_level += 1;
- }
- var it = inner.iterator();
- while (it.next()) |entry| {
- if (!field_output) {
- field_output = true;
- } else {
- try out_stream.writeByte(',');
- }
- if (child_options.whitespace) |child_whitespace| {
- try child_whitespace.outputIndent(out_stream);
- }
-
- try stringify(entry.key_ptr.*, options, out_stream);
- try out_stream.writeByte(':');
- if (child_options.whitespace) |child_whitespace| {
- if (child_whitespace.separator) {
- try out_stream.writeByte(' ');
- }
- }
- try stringify(entry.value_ptr.*, child_options, out_stream);
- }
- if (field_output) {
- if (options.whitespace) |whitespace| {
- try whitespace.outputIndent(out_stream);
- }
- }
- try out_stream.writeByte('}');
- },
- }
- }
-
- pub fn dump(self: Value) void {
- std.debug.getStderrMutex().lock();
- defer std.debug.getStderrMutex().unlock();
-
- const stderr = std.io.getStdErr().writer();
- std.json.stringify(self, std.json.StringifyOptions{ .whitespace = null }, stderr) catch return;
- }
-};
-
-/// parse tokens from a stream, returning `false` if they do not decode to `value`
-fn parsesTo(comptime T: type, value: T, tokens: *TokenStream, options: ParseOptions) !bool {
- // TODO: should be able to write this function to not require an allocator
- const tmp = try parse(T, tokens, options);
- defer parseFree(T, tmp, options);
-
- return parsedEqual(tmp, value);
-}
-
-/// Returns if a value returned by `parse` is deep-equal to another value
-fn parsedEqual(a: anytype, b: @TypeOf(a)) bool {
- switch (@typeInfo(@TypeOf(a))) {
- .Optional => {
- if (a == null and b == null) return true;
- if (a == null or b == null) return false;
- return parsedEqual(a.?, b.?);
- },
- .Union => |info| {
- if (info.tag_type) |UnionTag| {
- const tag_a = std.meta.activeTag(a);
- const tag_b = std.meta.activeTag(b);
- if (tag_a != tag_b) return false;
-
- inline for (info.fields) |field_info| {
- if (@field(UnionTag, field_info.name) == tag_a) {
- return parsedEqual(@field(a, field_info.name), @field(b, field_info.name));
- }
- }
- return false;
- } else {
- unreachable;
- }
- },
- .Array => {
- for (a, 0..) |e, i|
- if (!parsedEqual(e, b[i])) return false;
- return true;
- },
- .Struct => |info| {
- inline for (info.fields) |field_info| {
- if (!parsedEqual(@field(a, field_info.name), @field(b, field_info.name))) return false;
- }
- return true;
- },
- .Pointer => |ptrInfo| switch (ptrInfo.size) {
- .One => return parsedEqual(a.*, b.*),
- .Slice => {
- if (a.len != b.len) return false;
- for (a, 0..) |e, i|
- if (!parsedEqual(e, b[i])) return false;
- return true;
- },
- .Many, .C => unreachable,
- },
- else => return a == b,
- }
- unreachable;
-}
-
-pub const ParseOptions = struct {
- allocator: ?Allocator = null,
-
- /// Behaviour when a duplicate field is encountered.
- duplicate_field_behavior: enum {
- UseFirst,
- Error,
- UseLast,
- } = .Error,
-
- /// If false, finding an unknown field returns an error.
- ignore_unknown_fields: bool = false,
-
- allow_trailing_data: bool = false,
-};
-
-const SkipValueError = error{UnexpectedJsonDepth} || TokenStream.Error;
-
-fn skipValue(tokens: *TokenStream) SkipValueError!void {
- const original_depth = tokens.stackUsed();
-
- // Return an error if no value is found
- _ = try tokens.next();
- if (tokens.stackUsed() < original_depth) return error.UnexpectedJsonDepth;
- if (tokens.stackUsed() == original_depth) return;
-
- while (try tokens.next()) |_| {
- if (tokens.stackUsed() == original_depth) return;
- }
-}
-
-fn ParseInternalError(comptime T: type) type {
- // `inferred_types` is used to avoid infinite recursion for recursive type definitions.
- const inferred_types = [_]type{};
- return ParseInternalErrorImpl(T, &inferred_types);
-}
-
-fn ParseInternalErrorImpl(comptime T: type, comptime inferred_types: []const type) type {
- for (inferred_types) |ty| {
- if (T == ty) return error{};
- }
-
- switch (@typeInfo(T)) {
- .Bool => return error{UnexpectedToken},
- .Float, .ComptimeFloat => return error{UnexpectedToken} || std.fmt.ParseFloatError,
- .Int, .ComptimeInt => {
- return error{ UnexpectedToken, InvalidNumber, Overflow } ||
- std.fmt.ParseIntError || std.fmt.ParseFloatError;
- },
- .Optional => |optionalInfo| {
- return ParseInternalErrorImpl(optionalInfo.child, inferred_types ++ [_]type{T});
- },
- .Enum => return error{ UnexpectedToken, InvalidEnumTag } || std.fmt.ParseIntError ||
- std.meta.IntToEnumError || std.meta.IntToEnumError,
- .Union => |unionInfo| {
- if (unionInfo.tag_type) |_| {
- var errors = error{NoUnionMembersMatched};
- for (unionInfo.fields) |u_field| {
- errors = errors || ParseInternalErrorImpl(u_field.type, inferred_types ++ [_]type{T});
- }
- return errors;
- } else {
- @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
- }
- },
- .Struct => |structInfo| {
- var errors = error{
- DuplicateJSONField,
- UnexpectedEndOfJson,
- UnexpectedToken,
- UnexpectedValue,
- UnknownField,
- MissingField,
- } || SkipValueError || TokenStream.Error;
- for (structInfo.fields) |field| {
- errors = errors || ParseInternalErrorImpl(field.type, inferred_types ++ [_]type{T});
- }
- return errors;
- },
- .Array => |arrayInfo| {
- return error{ UnexpectedEndOfJson, UnexpectedToken, LengthMismatch } || TokenStream.Error ||
- UnescapeValidStringError ||
- ParseInternalErrorImpl(arrayInfo.child, inferred_types ++ [_]type{T});
- },
- .Vector => |vecInfo| {
- return error{ UnexpectedEndOfJson, UnexpectedToken, LengthMismatch } || TokenStream.Error ||
- UnescapeValidStringError ||
- ParseInternalErrorImpl(vecInfo.child, inferred_types ++ [_]type{T});
- },
- .Pointer => |ptrInfo| {
- var errors = error{AllocatorRequired} || std.mem.Allocator.Error;
- switch (ptrInfo.size) {
- .One => {
- return errors || ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T});
- },
- .Slice => {
- return errors || error{ UnexpectedEndOfJson, UnexpectedToken } ||
- ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T}) ||
- UnescapeValidStringError || TokenStream.Error;
- },
- else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
- }
- },
- else => return error{},
- }
- unreachable;
-}
-
-fn parseInternalArray(
- comptime T: type,
- comptime Elt: type,
- comptime arr_len: usize,
- tokens: *TokenStream,
- options: ParseOptions,
-) ParseInternalError(T)!T {
- var r: T = undefined;
- var i: usize = 0;
- var child_options = options;
- child_options.allow_trailing_data = true;
- errdefer {
- // Without the r.len check `r[i]` is not allowed
- if (arr_len > 0) while (true) : (i -= 1) {
- parseFree(Elt, r[i], options);
- if (i == 0) break;
- };
- }
- if (arr_len > 0) while (i < arr_len) : (i += 1) {
- r[i] = try parse(Elt, tokens, child_options);
- };
- const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
- switch (tok) {
- .ArrayEnd => {},
- else => return error.UnexpectedToken,
- }
- return r;
-}
-
-fn parseInternal(
- comptime T: type,
- token: Token,
- tokens: *TokenStream,
- options: ParseOptions,
-) ParseInternalError(T)!T {
- switch (@typeInfo(T)) {
- .Bool => {
- return switch (token) {
- .True => true,
- .False => false,
- else => error.UnexpectedToken,
- };
- },
- .Float, .ComptimeFloat => {
- switch (token) {
- .Number => |numberToken| return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1)),
- .String => |stringToken| return try std.fmt.parseFloat(T, stringToken.slice(tokens.slice, tokens.i - 1)),
- else => return error.UnexpectedToken,
- }
- },
- .Int, .ComptimeInt => {
- switch (token) {
- .Number => |numberToken| {
- if (numberToken.is_integer)
- return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10);
- const float = try std.fmt.parseFloat(f128, numberToken.slice(tokens.slice, tokens.i - 1));
- if (@round(float) != float) return error.InvalidNumber;
- if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow;
- return @floatToInt(T, float);
- },
- .String => |stringToken| {
- return std.fmt.parseInt(T, stringToken.slice(tokens.slice, tokens.i - 1), 10) catch |err| {
- switch (err) {
- error.Overflow => return err,
- error.InvalidCharacter => {
- const float = try std.fmt.parseFloat(f128, stringToken.slice(tokens.slice, tokens.i - 1));
- if (@round(float) != float) return error.InvalidNumber;
- if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow;
- return @floatToInt(T, float);
- },
- }
- };
- },
- else => return error.UnexpectedToken,
- }
- },
- .Optional => |optionalInfo| {
- if (token == .Null) {
- return null;
- } else {
- return try parseInternal(optionalInfo.child, token, tokens, options);
- }
- },
- .Enum => |enumInfo| {
- switch (token) {
- .Number => |numberToken| {
- if (!numberToken.is_integer) return error.UnexpectedToken;
- const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10);
- return try std.meta.intToEnum(T, n);
- },
- .String => |stringToken| {
- const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
- switch (stringToken.escapes) {
- .None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag,
- .Some => {
- inline for (enumInfo.fields) |field| {
- if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) {
- return @field(T, field.name);
- }
- }
- return error.InvalidEnumTag;
- },
- }
- },
- else => return error.UnexpectedToken,
- }
- },
- .Union => |unionInfo| {
- if (unionInfo.tag_type) |_| {
- // try each of the union fields until we find one that matches
- inline for (unionInfo.fields) |u_field| {
- // take a copy of tokens so we can withhold mutations until success
- var tokens_copy = tokens.*;
- if (parseInternal(u_field.type, token, &tokens_copy, options)) |value| {
- tokens.* = tokens_copy;
- return @unionInit(T, u_field.name, value);
- } else |err| {
- // Bubble up error.OutOfMemory
- // Parsing some types won't have OutOfMemory in their
- // error-sets, for the condition to be valid, merge it in.
- if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
- // Bubble up AllocatorRequired, as it indicates missing option
- if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err;
- // otherwise continue through the `inline for`
- }
- }
- return error.NoUnionMembersMatched;
- } else {
- @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
- }
- },
- .Struct => |structInfo| {
- if (structInfo.is_tuple) {
- switch (token) {
- .ArrayBegin => {},
- else => return error.UnexpectedToken,
- }
- var r: T = undefined;
- var child_options = options;
- child_options.allow_trailing_data = true;
- var fields_seen: usize = 0;
- errdefer {
- inline for (0..structInfo.fields.len) |i| {
- if (i < fields_seen) {
- parseFree(structInfo.fields[i].type, r[i], options);
- }
- }
- }
- inline for (0..structInfo.fields.len) |i| {
- r[i] = try parse(structInfo.fields[i].type, tokens, child_options);
- fields_seen = i + 1;
- }
- const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
- switch (tok) {
- .ArrayEnd => {},
- else => return error.UnexpectedToken,
- }
- return r;
- }
-
- switch (token) {
- .ObjectBegin => {},
- else => return error.UnexpectedToken,
- }
- var r: T = undefined;
- var fields_seen = [_]bool{false} ** structInfo.fields.len;
- errdefer {
- inline for (structInfo.fields, 0..) |field, i| {
- if (fields_seen[i] and !field.is_comptime) {
- parseFree(field.type, @field(r, field.name), options);
- }
- }
- }
-
- while (true) {
- switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) {
- .ObjectEnd => break,
- .String => |stringToken| {
- const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
- var child_options = options;
- child_options.allow_trailing_data = true;
- var found = false;
- inline for (structInfo.fields, 0..) |field, i| {
- if (switch (stringToken.escapes) {
- .None => mem.eql(u8, field.name, key_source_slice),
- .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)),
- }) {
- if (fields_seen[i]) {
- switch (options.duplicate_field_behavior) {
- .UseFirst => {
- // unconditionally ignore value. for comptime fields, this skips check against default_value
- parseFree(field.type, try parse(field.type, tokens, child_options), child_options);
- found = true;
- break;
- },
- .Error => return error.DuplicateJSONField,
- .UseLast => {
- if (!field.is_comptime) {
- parseFree(field.type, @field(r, field.name), child_options);
- }
- fields_seen[i] = false;
- },
- }
- }
- if (field.is_comptime) {
- if (!try parsesTo(field.type, @ptrCast(*align(1) const field.type, field.default_value.?).*, tokens, child_options)) {
- return error.UnexpectedValue;
- }
- } else {
- @field(r, field.name) = try parse(field.type, tokens, child_options);
- }
- fields_seen[i] = true;
- found = true;
- break;
- }
- }
- if (!found) {
- if (options.ignore_unknown_fields) {
- try skipValue(tokens);
- continue;
- } else {
- return error.UnknownField;
- }
- }
- },
- else => return error.UnexpectedToken,
- }
- }
- inline for (structInfo.fields, 0..) |field, i| {
- if (!fields_seen[i]) {
- if (field.default_value) |default_ptr| {
- if (!field.is_comptime) {
- const default = @ptrCast(*align(1) const field.type, default_ptr).*;
- @field(r, field.name) = default;
- }
- } else {
- return error.MissingField;
- }
- }
- }
- return r;
- },
- .Array => |arrayInfo| {
- switch (token) {
- .ArrayBegin => {
- const len = @typeInfo(T).Array.len;
- return parseInternalArray(T, arrayInfo.child, len, tokens, options);
- },
- .String => |stringToken| {
- if (arrayInfo.child != u8) return error.UnexpectedToken;
- var r: T = undefined;
- const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
- if (r.len != stringToken.decodedLength()) return error.LengthMismatch;
- switch (stringToken.escapes) {
- .None => @memcpy(r[0..source_slice.len], source_slice),
- .Some => try unescapeValidString(&r, source_slice),
- }
- return r;
- },
- else => return error.UnexpectedToken,
- }
- },
- .Vector => |vecInfo| {
- switch (token) {
- .ArrayBegin => {
- const len = @typeInfo(T).Vector.len;
- return parseInternalArray(T, vecInfo.child, len, tokens, options);
- },
- else => return error.UnexpectedToken,
- }
- },
- .Pointer => |ptrInfo| {
- const allocator = options.allocator orelse return error.AllocatorRequired;
- switch (ptrInfo.size) {
- .One => {
- const r: *ptrInfo.child = try allocator.create(ptrInfo.child);
- errdefer allocator.destroy(r);
- r.* = try parseInternal(ptrInfo.child, token, tokens, options);
- return r;
- },
- .Slice => {
- switch (token) {
- .ArrayBegin => {
- var arraylist = std.ArrayList(ptrInfo.child).init(allocator);
- errdefer {
- while (arraylist.popOrNull()) |v| {
- parseFree(ptrInfo.child, v, options);
- }
- arraylist.deinit();
- }
-
- while (true) {
- const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
- switch (tok) {
- .ArrayEnd => break,
- else => {},
- }
-
- try arraylist.ensureUnusedCapacity(1);
- const v = try parseInternal(ptrInfo.child, tok, tokens, options);
- arraylist.appendAssumeCapacity(v);
- }
-
- if (ptrInfo.sentinel) |some| {
- const sentinel_value = @ptrCast(*align(1) const ptrInfo.child, some).*;
- return try arraylist.toOwnedSliceSentinel(sentinel_value);
- }
-
- return try arraylist.toOwnedSlice();
- },
- .String => |stringToken| {
- if (ptrInfo.child != u8) return error.UnexpectedToken;
- const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
- const len = stringToken.decodedLength();
- const output = if (ptrInfo.sentinel) |sentinel_ptr|
- try allocator.allocSentinel(u8, len, @ptrCast(*const u8, sentinel_ptr).*)
- else
- try allocator.alloc(u8, len);
- errdefer allocator.free(output);
- switch (stringToken.escapes) {
- .None => @memcpy(output[0..source_slice.len], source_slice),
- .Some => try unescapeValidString(output, source_slice),
- }
-
- return output;
- },
- else => return error.UnexpectedToken,
- }
- },
- else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
- }
- },
- else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
- }
- unreachable;
-}
-
-pub fn ParseError(comptime T: type) type {
- return ParseInternalError(T) || error{UnexpectedEndOfJson} || TokenStream.Error;
-}
-
-pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) ParseError(T)!T {
- const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
- const r = try parseInternal(T, token, tokens, options);
- errdefer parseFree(T, r, options);
- if (!options.allow_trailing_data) {
- if ((try tokens.next()) != null) unreachable;
- assert(tokens.i >= tokens.slice.len);
- }
- return r;
-}
-
-/// Releases resources created by `parse`.
-/// Should be called with the same type and `ParseOptions` that were passed to `parse`
-pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void {
- switch (@typeInfo(T)) {
- .Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {},
- .Optional => {
- if (value) |v| {
- return parseFree(@TypeOf(v), v, options);
- }
- },
- .Union => |unionInfo| {
- if (unionInfo.tag_type) |UnionTagType| {
- inline for (unionInfo.fields) |u_field| {
- if (value == @field(UnionTagType, u_field.name)) {
- parseFree(u_field.type, @field(value, u_field.name), options);
- break;
- }
- }
- } else {
- unreachable;
- }
- },
- .Struct => |structInfo| {
- inline for (structInfo.fields) |field| {
- if (!field.is_comptime) {
- var should_free = true;
- if (field.default_value) |default| {
- switch (@typeInfo(field.type)) {
- // We must not attempt to free pointers to struct default values
- .Pointer => |fieldPtrInfo| {
- const field_value = @field(value, field.name);
- const field_ptr = switch (fieldPtrInfo.size) {
- .One => field_value,
- .Slice => field_value.ptr,
- else => unreachable, // Other pointer types are not parseable
- };
- const field_addr = @ptrToInt(field_ptr);
-
- const casted_default = @ptrCast(*const field.type, @alignCast(@alignOf(field.type), default)).*;
- const default_ptr = switch (fieldPtrInfo.size) {
- .One => casted_default,
- .Slice => casted_default.ptr,
- else => unreachable, // Other pointer types are not parseable
- };
- const default_addr = @ptrToInt(default_ptr);
-
- if (field_addr == default_addr) {
- should_free = false;
- }
- },
- else => {},
- }
- }
- if (should_free) {
- parseFree(field.type, @field(value, field.name), options);
- }
- }
- }
- },
- .Array => |arrayInfo| {
- for (value) |v| {
- parseFree(arrayInfo.child, v, options);
- }
- },
- .Vector => |vecInfo| {
- var i: usize = 0;
- var v_len: usize = @typeInfo(@TypeOf(value)).Vector.len;
- while (i < v_len) : (i += 1) {
- parseFree(vecInfo.child, value[i], options);
- }
- },
- .Pointer => |ptrInfo| {
- const allocator = options.allocator orelse unreachable;
- switch (ptrInfo.size) {
- .One => {
- parseFree(ptrInfo.child, value.*, options);
- allocator.destroy(value);
- },
- .Slice => {
- for (value) |v| {
- parseFree(ptrInfo.child, v, options);
- }
- allocator.free(value);
- },
- else => unreachable,
- }
- },
- else => unreachable,
- }
-}
-
-/// A non-stream JSON parser which constructs a tree of Value's.
-pub const Parser = struct {
- allocator: Allocator,
- state: State,
- copy_strings: bool,
- // Stores parent nodes and un-combined Values.
- stack: Array,
-
- const State = enum {
- ObjectKey,
- ObjectValue,
- ArrayValue,
- Simple,
- };
-
- pub fn init(allocator: Allocator, copy_strings: bool) Parser {
- return Parser{
- .allocator = allocator,
- .state = .Simple,
- .copy_strings = copy_strings,
- .stack = Array.init(allocator),
- };
- }
-
- pub fn deinit(p: *Parser) void {
- p.stack.deinit();
- }
-
- pub fn reset(p: *Parser) void {
- p.state = .Simple;
- p.stack.shrinkRetainingCapacity(0);
- }
-
- pub fn parse(p: *Parser, input: []const u8) !ValueTree {
- var s = TokenStream.init(input);
-
- var arena = try p.allocator.create(ArenaAllocator);
- errdefer p.allocator.destroy(arena);
-
- arena.* = ArenaAllocator.init(p.allocator);
- errdefer arena.deinit();
-
- const allocator = arena.allocator();
-
- while (try s.next()) |token| {
- try p.transition(allocator, input, s.i - 1, token);
- }
-
- debug.assert(p.stack.items.len == 1);
-
- return ValueTree{
- .arena = arena,
- .root = p.stack.items[0],
- };
- }
-
- // Even though p.allocator exists, we take an explicit allocator so that allocation state
- // can be cleaned up on error correctly during a `parse` on call.
- fn transition(p: *Parser, allocator: Allocator, input: []const u8, i: usize, token: Token) !void {
- switch (p.state) {
- .ObjectKey => switch (token) {
- .ObjectEnd => {
- if (p.stack.items.len == 1) {
- return;
- }
-
- var value = p.stack.pop();
- try p.pushToParent(&value);
- },
- .String => |s| {
- try p.stack.append(try p.parseString(allocator, s, input, i));
- p.state = .ObjectValue;
- },
- else => {
- // The streaming parser would return an error eventually.
- // To prevent invalid state we return an error now.
- // TODO make the streaming parser return an error as soon as it encounters an invalid object key
- return error.InvalidLiteral;
- },
- },
- .ObjectValue => {
- var object = &p.stack.items[p.stack.items.len - 2].Object;
- var key = p.stack.items[p.stack.items.len - 1].String;
-
- switch (token) {
- .ObjectBegin => {
- try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
- p.state = .ObjectKey;
- },
- .ArrayBegin => {
- try p.stack.append(Value{ .Array = Array.init(allocator) });
- p.state = .ArrayValue;
- },
- .String => |s| {
- try object.put(key, try p.parseString(allocator, s, input, i));
- _ = p.stack.pop();
- p.state = .ObjectKey;
- },
- .Number => |n| {
- try object.put(key, try p.parseNumber(n, input, i));
- _ = p.stack.pop();
- p.state = .ObjectKey;
- },
- .True => {
- try object.put(key, Value{ .Bool = true });
- _ = p.stack.pop();
- p.state = .ObjectKey;
- },
- .False => {
- try object.put(key, Value{ .Bool = false });
- _ = p.stack.pop();
- p.state = .ObjectKey;
- },
- .Null => {
- try object.put(key, Value.Null);
- _ = p.stack.pop();
- p.state = .ObjectKey;
- },
- .ObjectEnd, .ArrayEnd => {
- unreachable;
- },
- }
- },
- .ArrayValue => {
- var array = &p.stack.items[p.stack.items.len - 1].Array;
-
- switch (token) {
- .ArrayEnd => {
- if (p.stack.items.len == 1) {
- return;
- }
-
- var value = p.stack.pop();
- try p.pushToParent(&value);
- },
- .ObjectBegin => {
- try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
- p.state = .ObjectKey;
- },
- .ArrayBegin => {
- try p.stack.append(Value{ .Array = Array.init(allocator) });
- p.state = .ArrayValue;
- },
- .String => |s| {
- try array.append(try p.parseString(allocator, s, input, i));
- },
- .Number => |n| {
- try array.append(try p.parseNumber(n, input, i));
- },
- .True => {
- try array.append(Value{ .Bool = true });
- },
- .False => {
- try array.append(Value{ .Bool = false });
- },
- .Null => {
- try array.append(Value.Null);
- },
- .ObjectEnd => {
- unreachable;
- },
- }
- },
- .Simple => switch (token) {
- .ObjectBegin => {
- try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
- p.state = .ObjectKey;
- },
- .ArrayBegin => {
- try p.stack.append(Value{ .Array = Array.init(allocator) });
- p.state = .ArrayValue;
- },
- .String => |s| {
- try p.stack.append(try p.parseString(allocator, s, input, i));
- },
- .Number => |n| {
- try p.stack.append(try p.parseNumber(n, input, i));
- },
- .True => {
- try p.stack.append(Value{ .Bool = true });
- },
- .False => {
- try p.stack.append(Value{ .Bool = false });
- },
- .Null => {
- try p.stack.append(Value.Null);
- },
- .ObjectEnd, .ArrayEnd => {
- unreachable;
- },
- },
- }
- }
-
- fn pushToParent(p: *Parser, value: *const Value) !void {
- switch (p.stack.items[p.stack.items.len - 1]) {
- // Object Parent -> [ ..., object, <key>, value ]
- Value.String => |key| {
- _ = p.stack.pop();
-
- var object = &p.stack.items[p.stack.items.len - 1].Object;
- try object.put(key, value.*);
- p.state = .ObjectKey;
- },
- // Array Parent -> [ ..., <array>, value ]
- Value.Array => |*array| {
- try array.append(value.*);
- p.state = .ArrayValue;
- },
- else => {
- unreachable;
- },
- }
- }
-
- fn parseString(p: *Parser, allocator: Allocator, s: std.meta.TagPayload(Token, Token.String), input: []const u8, i: usize) !Value {
- const slice = s.slice(input, i);
- switch (s.escapes) {
- .None => return Value{ .String = if (p.copy_strings) try allocator.dupe(u8, slice) else slice },
- .Some => {
- const output = try allocator.alloc(u8, s.decodedLength());
- errdefer allocator.free(output);
- try unescapeValidString(output, slice);
- return Value{ .String = output };
- },
- }
- }
-
- fn parseNumber(p: *Parser, n: std.meta.TagPayload(Token, Token.Number), input: []const u8, i: usize) !Value {
- _ = p;
- return if (n.is_integer)
- Value{
- .Integer = std.fmt.parseInt(i64, n.slice(input, i), 10) catch |e| switch (e) {
- error.Overflow => return Value{ .NumberString = n.slice(input, i) },
- error.InvalidCharacter => |err| return err,
- },
- }
- else
- Value{ .Float = try std.fmt.parseFloat(f64, n.slice(input, i)) };
- }
-};
-
-pub const UnescapeValidStringError = error{InvalidUnicodeHexSymbol};
-
-/// Unescape a JSON string
-/// Only to be used on strings already validated by the parser
-/// (note the unreachable statements and lack of bounds checking)
-pub fn unescapeValidString(output: []u8, input: []const u8) UnescapeValidStringError!void {
- var inIndex: usize = 0;
- var outIndex: usize = 0;
-
- while (inIndex < input.len) {
- if (input[inIndex] != '\\') {
- // not an escape sequence
- output[outIndex] = input[inIndex];
- inIndex += 1;
- outIndex += 1;
- } else if (input[inIndex + 1] != 'u') {
- // a simple escape sequence
- output[outIndex] = @as(u8, switch (input[inIndex + 1]) {
- '\\' => '\\',
- '/' => '/',
- 'n' => '\n',
- 'r' => '\r',
- 't' => '\t',
- 'f' => 12,
- 'b' => 8,
- '"' => '"',
- else => unreachable,
- });
- inIndex += 2;
- outIndex += 1;
- } else {
- // a unicode escape sequence
- const firstCodeUnit = std.fmt.parseInt(u16, input[inIndex + 2 .. inIndex + 6], 16) catch unreachable;
-
- // guess optimistically that it's not a surrogate pair
- if (std.unicode.utf8Encode(firstCodeUnit, output[outIndex..])) |byteCount| {
- outIndex += byteCount;
- inIndex += 6;
- } else |err| {
- // it might be a surrogate pair
- if (err != error.Utf8CannotEncodeSurrogateHalf) {
- return error.InvalidUnicodeHexSymbol;
- }
- // check if a second code unit is present
- if (inIndex + 7 >= input.len or input[inIndex + 6] != '\\' or input[inIndex + 7] != 'u') {
- return error.InvalidUnicodeHexSymbol;
- }
-
- const secondCodeUnit = std.fmt.parseInt(u16, input[inIndex + 8 .. inIndex + 12], 16) catch unreachable;
-
- const utf16le_seq = [2]u16{
- mem.nativeToLittle(u16, firstCodeUnit),
- mem.nativeToLittle(u16, secondCodeUnit),
- };
- if (std.unicode.utf16leToUtf8(output[outIndex..], &utf16le_seq)) |byteCount| {
- outIndex += byteCount;
- inIndex += 12;
- } else |_| {
- return error.InvalidUnicodeHexSymbol;
- }
- }
- }
- }
- assert(outIndex == output.len);
-}
-
-pub const StringifyOptions = struct {
- pub const Whitespace = struct {
- /// How many indentation levels deep are we?
- indent_level: usize = 0,
-
- /// What character(s) should be used for indentation?
- indent: union(enum) {
- Space: u8,
- Tab: void,
- None: void,
- } = .{ .Space = 4 },
-
- /// After a colon, should whitespace be inserted?
- separator: bool = true,
-
- pub fn outputIndent(
- whitespace: @This(),
- out_stream: anytype,
- ) @TypeOf(out_stream).Error!void {
- var char: u8 = undefined;
- var n_chars: usize = undefined;
- switch (whitespace.indent) {
- .Space => |n_spaces| {
- char = ' ';
- n_chars = n_spaces;
- },
- .Tab => {
- char = '\t';
- n_chars = 1;
- },
- .None => return,
- }
- try out_stream.writeByte('\n');
- n_chars *= whitespace.indent_level;
- try out_stream.writeByteNTimes(char, n_chars);
- }
- };
-
- /// Controls the whitespace emitted
- whitespace: ?Whitespace = null,
-
- /// Should optional fields with null value be written?
- emit_null_optional_fields: bool = true,
-
- string: StringOptions = StringOptions{ .String = .{} },
-
- /// Should []u8 be serialised as a string? or an array?
- pub const StringOptions = union(enum) {
- Array,
- String: StringOutputOptions,
-
- /// String output options
- const StringOutputOptions = struct {
- /// Should '/' be escaped in strings?
- escape_solidus: bool = false,
-
- /// Should unicode characters be escaped in strings?
- escape_unicode: bool = false,
- };
- };
-};
-
-fn outputUnicodeEscape(
- codepoint: u21,
- out_stream: anytype,
-) !void {
- if (codepoint <= 0xFFFF) {
- // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
- // then it may be represented as a six-character sequence: a reverse solidus, followed
- // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point.
- try out_stream.writeAll("\\u");
- try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
- } else {
- assert(codepoint <= 0x10FFFF);
- // To escape an extended character that is not in the Basic Multilingual Plane,
- // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair.
- const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
- const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
- try out_stream.writeAll("\\u");
- try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
- try out_stream.writeAll("\\u");
- try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
- }
-}
-
-/// Write `string` to `writer` as a JSON encoded string.
-pub fn encodeJsonString(string: []const u8, options: StringifyOptions, writer: anytype) !void {
- try writer.writeByte('\"');
- try encodeJsonStringChars(string, options, writer);
- try writer.writeByte('\"');
-}
-
-/// Write `chars` to `writer` as JSON encoded string characters.
-pub fn encodeJsonStringChars(chars: []const u8, options: StringifyOptions, writer: anytype) !void {
- var i: usize = 0;
- while (i < chars.len) : (i += 1) {
- switch (chars[i]) {
- // normal ascii character
- 0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => |c| try writer.writeByte(c),
- // only 2 characters that *must* be escaped
- '\\' => try writer.writeAll("\\\\"),
- '\"' => try writer.writeAll("\\\""),
- // solidus is optional to escape
- '/' => {
- if (options.string.String.escape_solidus) {
- try writer.writeAll("\\/");
- } else {
- try writer.writeByte('/');
- }
- },
- // control characters with short escapes
- // TODO: option to switch between unicode and 'short' forms?
- 0x8 => try writer.writeAll("\\b"),
- 0xC => try writer.writeAll("\\f"),
- '\n' => try writer.writeAll("\\n"),
- '\r' => try writer.writeAll("\\r"),
- '\t' => try writer.writeAll("\\t"),
- else => {
- const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable;
- // control characters (only things left with 1 byte length) should always be printed as unicode escapes
- if (ulen == 1 or options.string.String.escape_unicode) {
- const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable;
- try outputUnicodeEscape(codepoint, writer);
- } else {
- try writer.writeAll(chars[i..][0..ulen]);
- }
- i += ulen - 1;
- },
- }
- }
-}
-
-pub fn stringify(
- value: anytype,
- options: StringifyOptions,
- out_stream: anytype,
-) !void {
- const T = @TypeOf(value);
- switch (@typeInfo(T)) {
- .Float, .ComptimeFloat => {
- return std.fmt.formatFloatScientific(value, std.fmt.FormatOptions{}, out_stream);
- },
- .Int, .ComptimeInt => {
- return std.fmt.formatIntValue(value, "", std.fmt.FormatOptions{}, out_stream);
- },
- .Bool => {
- return out_stream.writeAll(if (value) "true" else "false");
- },
- .Null => {
- return out_stream.writeAll("null");
- },
- .Optional => {
- if (value) |payload| {
- return try stringify(payload, options, out_stream);
- } else {
- return try stringify(null, options, out_stream);
- }
- },
- .Enum => {
- if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
- return value.jsonStringify(options, out_stream);
- }
-
- @compileError("Unable to stringify enum '" ++ @typeName(T) ++ "'");
- },
- .Union => {
- if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
- return value.jsonStringify(options, out_stream);
- }
-
- const info = @typeInfo(T).Union;
- if (info.tag_type) |UnionTagType| {
- inline for (info.fields) |u_field| {
- if (value == @field(UnionTagType, u_field.name)) {
- return try stringify(@field(value, u_field.name), options, out_stream);
- }
- }
- } else {
- @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'");
- }
- },
- .Struct => |S| {
- if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
- return value.jsonStringify(options, out_stream);
- }
-
- try out_stream.writeByte(if (S.is_tuple) '[' else '{');
- var field_output = false;
- var child_options = options;
- if (child_options.whitespace) |*child_whitespace| {
- child_whitespace.indent_level += 1;
- }
- inline for (S.fields) |Field| {
- // don't include void fields
- if (Field.type == void) continue;
-
- var emit_field = true;
-
- // don't include optional fields that are null when emit_null_optional_fields is set to false
- if (@typeInfo(Field.type) == .Optional) {
- if (options.emit_null_optional_fields == false) {
- if (@field(value, Field.name) == null) {
- emit_field = false;
- }
- }
- }
-
- if (emit_field) {
- if (!field_output) {
- field_output = true;
- } else {
- try out_stream.writeByte(',');
- }
- if (child_options.whitespace) |child_whitespace| {
- try child_whitespace.outputIndent(out_stream);
- }
- if (!S.is_tuple) {
- try encodeJsonString(Field.name, options, out_stream);
- try out_stream.writeByte(':');
- if (child_options.whitespace) |child_whitespace| {
- if (child_whitespace.separator) {
- try out_stream.writeByte(' ');
- }
- }
- }
- try stringify(@field(value, Field.name), child_options, out_stream);
- }
- }
- if (field_output) {
- if (options.whitespace) |whitespace| {
- try whitespace.outputIndent(out_stream);
- }
- }
- try out_stream.writeByte(if (S.is_tuple) ']' else '}');
- return;
- },
- .ErrorSet => return stringify(@as([]const u8, @errorName(value)), options, out_stream),
- .Pointer => |ptr_info| switch (ptr_info.size) {
- .One => switch (@typeInfo(ptr_info.child)) {
- .Array => {
- const Slice = []const std.meta.Elem(ptr_info.child);
- return stringify(@as(Slice, value), options, out_stream);
- },
- else => {
- // TODO: avoid loops?
- return stringify(value.*, options, out_stream);
- },
- },
- .Many, .Slice => {
- if (ptr_info.size == .Many and ptr_info.sentinel == null)
- @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel");
- const slice = if (ptr_info.size == .Many) mem.span(value) else value;
-
- if (ptr_info.child == u8 and options.string == .String and std.unicode.utf8ValidateSlice(slice)) {
- try encodeJsonString(slice, options, out_stream);
- return;
- }
-
- try out_stream.writeByte('[');
- var child_options = options;
- if (child_options.whitespace) |*whitespace| {
- whitespace.indent_level += 1;
- }
- for (slice, 0..) |x, i| {
- if (i != 0) {
- try out_stream.writeByte(',');
- }
- if (child_options.whitespace) |child_whitespace| {
- try child_whitespace.outputIndent(out_stream);
- }
- try stringify(x, child_options, out_stream);
- }
- if (slice.len != 0) {
- if (options.whitespace) |whitespace| {
- try whitespace.outputIndent(out_stream);
- }
- }
- try out_stream.writeByte(']');
- return;
- },
- else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
- },
- .Array => return stringify(&value, options, out_stream),
- .Vector => |info| {
- const array: [info.len]info.child = value;
- return stringify(&array, options, out_stream);
- },
- else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
- }
- unreachable;
-}
-
-// Same as `stringify` but accepts an Allocator and stores result in dynamically allocated memory instead of using a Writer.
-// Caller owns returned memory.
-pub fn stringifyAlloc(allocator: std.mem.Allocator, value: anytype, options: StringifyOptions) ![]const u8 {
- var list = std.ArrayList(u8).init(allocator);
- errdefer list.deinit();
- try stringify(value, options, list.writer());
- return list.toOwnedSlice();
-}
+// Deprecations
+pub const parse = @compileError("Deprecated; use parseFromSlice() or parseFromTokenSource() instead.");
+pub const StreamingParser = @compileError("Deprecated; use json.Scanner or json.Reader instead.");
+pub const TokenStream = @compileError("Deprecated; use json.Scanner or json.Reader instead.");
test {
_ = @import("json/test.zig");
+ _ = @import("json/scanner.zig");
_ = @import("json/write_stream.zig");
-}
-
-test "stringify null optional fields" {
- const MyStruct = struct {
- optional: ?[]const u8 = null,
- required: []const u8 = "something",
- another_optional: ?[]const u8 = null,
- another_required: []const u8 = "something else",
- };
- try teststringify(
- \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"}
- ,
- MyStruct{},
- StringifyOptions{},
- );
- try teststringify(
- \\{"required":"something","another_required":"something else"}
- ,
- MyStruct{},
- StringifyOptions{ .emit_null_optional_fields = false },
- );
-
- var ts = TokenStream.init(
- \\{"required":"something","another_required":"something else"}
- );
- try std.testing.expect(try parsesTo(MyStruct, MyStruct{}, &ts, .{
- .allocator = std.testing.allocator,
- }));
-}
-
-test "skipValue" {
- var ts = TokenStream.init("false");
- try skipValue(&ts);
- ts = TokenStream.init("true");
- try skipValue(&ts);
- ts = TokenStream.init("null");
- try skipValue(&ts);
- ts = TokenStream.init("42");
- try skipValue(&ts);
- ts = TokenStream.init("42.0");
- try skipValue(&ts);
- ts = TokenStream.init("\"foo\"");
- try skipValue(&ts);
- ts = TokenStream.init("[101, 111, 121]");
- try skipValue(&ts);
- ts = TokenStream.init("{}");
- try skipValue(&ts);
- ts = TokenStream.init("{\"foo\": \"bar\"}");
- try skipValue(&ts);
-
- { // An absurd number of nestings
- const nestings = StreamingParser.default_max_nestings + 1;
-
- ts = TokenStream.init("[" ** nestings ++ "]" ** nestings);
- try testing.expectError(error.TooManyNestedItems, skipValue(&ts));
- }
-
- { // Would a number token cause problems in a deeply-nested array?
- const nestings = StreamingParser.default_max_nestings;
- const deeply_nested_array = "[" ** nestings ++ "0.118, 999, 881.99, 911.9, 725, 3" ++ "]" ** nestings;
-
- ts = TokenStream.init(deeply_nested_array);
- try skipValue(&ts);
-
- ts = TokenStream.init("[" ++ deeply_nested_array ++ "]");
- try testing.expectError(error.TooManyNestedItems, skipValue(&ts));
- }
-
- // Mismatched brace/square bracket
- ts = TokenStream.init("[102, 111, 111}");
- try testing.expectError(error.UnexpectedClosingBrace, skipValue(&ts));
-
- { // should fail if no value found (e.g. immediate close of object)
- var empty_object = TokenStream.init("{}");
- assert(.ObjectBegin == (try empty_object.next()).?);
- try testing.expectError(error.UnexpectedJsonDepth, skipValue(&empty_object));
-
- var empty_array = TokenStream.init("[]");
- assert(.ArrayBegin == (try empty_array.next()).?);
- try testing.expectError(error.UnexpectedJsonDepth, skipValue(&empty_array));
- }
-}
-
-test "stringify basic types" {
- try teststringify("false", false, StringifyOptions{});
- try teststringify("true", true, StringifyOptions{});
- try teststringify("null", @as(?u8, null), StringifyOptions{});
- try teststringify("null", @as(?*u32, null), StringifyOptions{});
- try teststringify("42", 42, StringifyOptions{});
- try teststringify("4.2e+01", 42.0, StringifyOptions{});
- try teststringify("42", @as(u8, 42), StringifyOptions{});
- try teststringify("42", @as(u128, 42), StringifyOptions{});
- try teststringify("4.2e+01", @as(f32, 42), StringifyOptions{});
- try teststringify("4.2e+01", @as(f64, 42), StringifyOptions{});
- try teststringify("\"ItBroke\"", @as(anyerror, error.ItBroke), StringifyOptions{});
-}
-
-test "stringify string" {
- try teststringify("\"hello\"", "hello", StringifyOptions{});
- try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{});
- try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{});
- try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\u{80}\"", "with unicode\u{80}", StringifyOptions{});
- try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", StringifyOptions{});
- try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\u{100}\"", "with unicode\u{100}", StringifyOptions{});
- try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\u{800}\"", "with unicode\u{800}", StringifyOptions{});
- try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", StringifyOptions{});
- try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", StringifyOptions{});
- try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", StringifyOptions{});
- try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", StringifyOptions{});
- try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"/\"", "/", StringifyOptions{});
- try teststringify("\"\\/\"", "/", StringifyOptions{ .string = .{ .String = .{ .escape_solidus = true } } });
-}
-
-test "stringify many-item sentinel-terminated string" {
- try teststringify("\"hello\"", @as([*:0]const u8, "hello"), StringifyOptions{});
- try teststringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
- try teststringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
-}
-
-test "stringify tagged unions" {
- try teststringify("42", union(enum) {
- Foo: u32,
- Bar: bool,
- }{ .Foo = 42 }, StringifyOptions{});
-}
-
-test "stringify struct" {
- try teststringify("{\"foo\":42}", struct {
- foo: u32,
- }{ .foo = 42 }, StringifyOptions{});
-}
-
-test "stringify struct with string as array" {
- try teststringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, StringifyOptions{});
- try teststringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, StringifyOptions{ .string = .Array });
-}
-
-test "stringify struct with indentation" {
- try teststringify(
- \\{
- \\ "foo": 42,
- \\ "bar": [
- \\ 1,
- \\ 2,
- \\ 3
- \\ ]
- \\}
- ,
- struct {
- foo: u32,
- bar: [3]u32,
- }{
- .foo = 42,
- .bar = .{ 1, 2, 3 },
- },
- StringifyOptions{
- .whitespace = .{},
- },
- );
- try teststringify(
- "{\n\t\"foo\":42,\n\t\"bar\":[\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}",
- struct {
- foo: u32,
- bar: [3]u32,
- }{
- .foo = 42,
- .bar = .{ 1, 2, 3 },
- },
- StringifyOptions{
- .whitespace = .{
- .indent = .Tab,
- .separator = false,
- },
- },
- );
- try teststringify(
- \\{"foo":42,"bar":[1,2,3]}
- ,
- struct {
- foo: u32,
- bar: [3]u32,
- }{
- .foo = 42,
- .bar = .{ 1, 2, 3 },
- },
- StringifyOptions{
- .whitespace = .{
- .indent = .None,
- .separator = false,
- },
- },
- );
-}
-
-test "stringify struct with void field" {
- try teststringify("{\"foo\":42}", struct {
- foo: u32,
- bar: void = {},
- }{ .foo = 42 }, StringifyOptions{});
-}
-
-test "stringify array of structs" {
- const MyStruct = struct {
- foo: u32,
- };
- try teststringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
- MyStruct{ .foo = 42 },
- MyStruct{ .foo = 100 },
- MyStruct{ .foo = 1000 },
- }, StringifyOptions{});
-}
-
-test "stringify struct with custom stringifier" {
- try teststringify("[\"something special\",42]", struct {
- foo: u32,
- const Self = @This();
- pub fn jsonStringify(
- value: Self,
- options: StringifyOptions,
- out_stream: anytype,
- ) !void {
- _ = value;
- try out_stream.writeAll("[\"something special\",");
- try stringify(42, options, out_stream);
- try out_stream.writeByte(']');
- }
- }{ .foo = 42 }, StringifyOptions{});
-}
-
-test "stringify vector" {
- try teststringify("[1,1]", @splat(2, @as(u32, 1)), StringifyOptions{});
-}
-
-test "stringify tuple" {
- try teststringify("[\"foo\",42]", std.meta.Tuple(&.{ []const u8, usize }){ "foo", 42 }, StringifyOptions{});
-}
-
-fn teststringify(expected: []const u8, value: anytype, options: StringifyOptions) !void {
- const ValidationWriter = struct {
- const Self = @This();
- pub const Writer = std.io.Writer(*Self, Error, write);
- pub const Error = error{
- TooMuchData,
- DifferentData,
- };
-
- expected_remaining: []const u8,
-
- fn init(exp: []const u8) Self {
- return .{ .expected_remaining = exp };
- }
-
- pub fn writer(self: *Self) Writer {
- return .{ .context = self };
- }
-
- fn write(self: *Self, bytes: []const u8) Error!usize {
- if (self.expected_remaining.len < bytes.len) {
- std.debug.print(
- \\====== expected this output: =========
- \\{s}
- \\======== instead found this: =========
- \\{s}
- \\======================================
- , .{
- self.expected_remaining,
- bytes,
- });
- return error.TooMuchData;
- }
- if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) {
- std.debug.print(
- \\====== expected this output: =========
- \\{s}
- \\======== instead found this: =========
- \\{s}
- \\======================================
- , .{
- self.expected_remaining[0..bytes.len],
- bytes,
- });
- return error.DifferentData;
- }
- self.expected_remaining = self.expected_remaining[bytes.len..];
- return bytes.len;
- }
- };
-
- var vos = ValidationWriter.init(expected);
- try stringify(value, options, vos.writer());
- if (vos.expected_remaining.len > 0) return error.NotEnoughData;
-}
-
-test "encodesTo" {
- // same
- try testing.expectEqual(true, encodesTo("false", "false"));
- // totally different
- try testing.expectEqual(false, encodesTo("false", "true"));
- // different lengths
- try testing.expectEqual(false, encodesTo("false", "other"));
- // with escape
- try testing.expectEqual(true, encodesTo("\\", "\\\\"));
- try testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape"));
- // with unicode
- try testing.expectEqual(true, encodesTo("ą", "\\u0105"));
- try testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02"));
- try testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02"));
-}
-
-test "deserializing string with escape sequence into sentinel slice" {
- const json = "\"\\n\"";
- var token_stream = std.json.TokenStream.init(json);
- const options = ParseOptions{ .allocator = std.testing.allocator };
-
- // Pre-fix, this line would panic:
- const result = try std.json.parse([:0]const u8, &token_stream, options);
- defer std.json.parseFree([:0]const u8, result, options);
-
- // Double-check that we're getting the right result
- try testing.expect(mem.eql(u8, result, "\n"));
-}
-
-test "stringify struct with custom stringify that returns a custom error" {
- var ret = std.json.stringify(struct {
- field: Field = .{},
-
- pub const Field = struct {
- field: ?[]*Field = null,
-
- const Self = @This();
- pub fn jsonStringify(_: Self, _: StringifyOptions, _: anytype) error{CustomError}!void {
- return error.CustomError;
- }
- };
- }{}, StringifyOptions{}, std.io.null_writer);
-
- try std.testing.expectError(error.CustomError, ret);
+ _ = @import("json/dynamic.zig");
+ _ = @import("json/static.zig");
+ _ = @import("json/stringify.zig");
+ _ = @import("json/JSONTestSuite_test.zig");
}
src/Autodoc.zig
@@ -295,7 +295,7 @@ pub fn generateZirData(self: *Autodoc) !void {
try std.json.stringify(
data,
.{
- .whitespace = .{ .indent = .None, .separator = false },
+ .whitespace = .{ .indent = .none, .separator = false },
.emit_null_optional_fields = true,
},
out,
@@ -444,7 +444,7 @@ const DocData = struct {
w: anytype,
) !void {
var jsw = std.json.writeStream(w, 15);
- if (opts.whitespace) |ws| jsw.whitespace = ws;
+ jsw.whitespace = opts.whitespace;
try jsw.beginObject();
inline for (comptime std.meta.tags(std.meta.FieldEnum(DocData))) |f| {
const f_name = @tagName(f);
@@ -495,7 +495,7 @@ const DocData = struct {
w: anytype,
) !void {
var jsw = std.json.writeStream(w, 15);
- if (opts.whitespace) |ws| jsw.whitespace = ws;
+ jsw.whitespace = opts.whitespace;
try jsw.beginObject();
inline for (comptime std.meta.tags(std.meta.FieldEnum(DocModule))) |f| {
@@ -529,7 +529,7 @@ const DocData = struct {
w: anytype,
) !void {
var jsw = std.json.writeStream(w, 15);
- if (opts.whitespace) |ws| jsw.whitespace = ws;
+ jsw.whitespace = opts.whitespace;
try jsw.beginArray();
inline for (comptime std.meta.fields(Decl)) |f| {
try jsw.arrayElem();
@@ -556,7 +556,7 @@ const DocData = struct {
w: anytype,
) !void {
var jsw = std.json.writeStream(w, 15);
- if (opts.whitespace) |ws| jsw.whitespace = ws;
+ jsw.whitespace = opts.whitespace;
try jsw.beginArray();
inline for (comptime std.meta.fields(AstNode)) |f| {
try jsw.arrayElem();
@@ -689,7 +689,7 @@ const DocData = struct {
) !void {
const active_tag = std.meta.activeTag(self);
var jsw = std.json.writeStream(w, 15);
- if (opts.whitespace) |ws| jsw.whitespace = ws;
+ jsw.whitespace = opts.whitespace;
try jsw.beginArray();
try jsw.arrayElem();
try jsw.emitNumber(@enumToInt(active_tag));
@@ -831,7 +831,7 @@ const DocData = struct {
) @TypeOf(w).Error!void {
const active_tag = std.meta.activeTag(self);
var jsw = std.json.writeStream(w, 15);
- if (opts.whitespace) |ws| jsw.whitespace = ws;
+ jsw.whitespace = opts.whitespace;
try jsw.beginObject();
if (active_tag == .declIndex) {
try jsw.objectField("declRef");
src/print_env.zig
@@ -28,7 +28,7 @@ pub fn cmdEnv(gpa: Allocator, args: []const []const u8, stdout: std.fs.File.Writ
var bw = std.io.bufferedWriter(stdout);
const w = bw.writer();
- var jws = std.json.WriteStream(@TypeOf(w), 6).init(w);
+ var jws = std.json.writeStream(w, 6);
try jws.beginObject();
try jws.objectField("zig_exe");
src/print_targets.zig
@@ -40,7 +40,7 @@ pub fn cmdTargets(
var bw = io.bufferedWriter(stdout);
const w = bw.writer();
- var jws = std.json.WriteStream(@TypeOf(w), 6).init(w);
+ var jws = std.json.writeStream(w, 6);
try jws.beginObject();
tools/gen_spirv_spec.zig
@@ -20,8 +20,7 @@ pub fn main() !void {
// Required for json parsing.
@setEvalBranchQuota(10000);
- var tokens = std.json.TokenStream.init(spec);
- var registry = try std.json.parse(g.Registry, &tokens, .{ .allocator = allocator });
+ var registry = try std.json.parseFromSlice(g.Registry, allocator, spec, .{});
const core_reg = switch (registry) {
.core => |core_reg| core_reg,
tools/generate_JSONTestSuite.zig
@@ -0,0 +1,79 @@
+// zig run this file inside the test_parsing/ directory of this repo: https://github.com/nst/JSONTestSuite
+
+const std = @import("std");
+
+pub fn main() !void {
+ var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ var allocator = gpa.allocator();
+
+ var output = std.io.getStdOut().writer();
+ try output.writeAll(
+ \\// This file was generated by _generate_JSONTestSuite.zig
+ \\// These test cases are sourced from: https://github.com/nst/JSONTestSuite
+ \\const ok = @import("./test.zig").ok;
+ \\const err = @import("./test.zig").err;
+ \\const any = @import("./test.zig").any;
+ \\
+ \\
+ );
+
+ var names = std.ArrayList([]const u8).init(allocator);
+ var cwd = try std.fs.cwd().openIterableDir(".", .{});
+ var it = cwd.iterate();
+ while (try it.next()) |entry| {
+ try names.append(try allocator.dupe(u8, entry.name));
+ }
+ std.sort.sort([]const u8, names.items, {}, (struct {
+ fn lessThan(_: void, a: []const u8, b: []const u8) bool {
+ return std.mem.lessThan(u8, a, b);
+ }
+ }).lessThan);
+
+ for (names.items) |name| {
+ const contents = try std.fs.cwd().readFileAlloc(allocator, name, 250001);
+ try output.writeAll("test ");
+ try writeString(output, name);
+ try output.writeAll(" {\n try ");
+ switch (name[0]) {
+ 'y' => try output.writeAll("ok"),
+ 'n' => try output.writeAll("err"),
+ 'i' => try output.writeAll("any"),
+ else => unreachable,
+ }
+ try output.writeByte('(');
+ try writeString(output, contents);
+ try output.writeAll(");\n}\n");
+ }
+}
+
+const i_structure_500_nested_arrays = "[" ** 500 ++ "]" ** 500;
+const n_structure_100000_opening_arrays = "[" ** 100000;
+const n_structure_open_array_object = "[{\"\":" ** 50000 ++ "\n";
+
+fn writeString(writer: anytype, s: []const u8) !void {
+ if (s.len > 200) {
+ // There are a few of these we can compress with Zig expressions.
+ if (std.mem.eql(u8, s, i_structure_500_nested_arrays)) {
+ return writer.writeAll("\"[\" ** 500 ++ \"]\" ** 500");
+ } else if (std.mem.eql(u8, s, n_structure_100000_opening_arrays)) {
+ return writer.writeAll("\"[\" ** 100000");
+ } else if (std.mem.eql(u8, s, n_structure_open_array_object)) {
+ return writer.writeAll("\"[{\\\"\\\":\" ** 50000 ++ \"\\n\"");
+ }
+ unreachable;
+ }
+ try writer.writeByte('"');
+ for (s) |b| {
+ switch (b) {
+ 0...('\n' - 1),
+ ('\n' + 1)...0x1f,
+ 0x7f...0xff,
+ => try writer.print("\\x{x:0>2}", .{b}),
+ '\n' => try writer.writeAll("\\n"),
+ '"' => try writer.writeAll("\\\""),
+ '\\' => try writer.writeAll("\\\\"),
+ else => try writer.writeByte(b),
+ }
+ }
+ try writer.writeByte('"');
+}
tools/update_clang_options.zig
@@ -624,9 +624,9 @@ pub fn main() anyerror!void {
},
};
- var parser = json.Parser.init(allocator, false);
+ var parser = json.Parser.init(allocator, .alloc_if_needed);
const tree = try parser.parse(json_text);
- const root_map = &tree.root.Object;
+ const root_map = &tree.root.object;
var all_objects = std.ArrayList(*json.ObjectMap).init(allocator);
{
@@ -634,14 +634,14 @@ pub fn main() anyerror!void {
it_map: while (it.next()) |kv| {
if (kv.key_ptr.len == 0) continue;
if (kv.key_ptr.*[0] == '!') continue;
- if (kv.value_ptr.* != .Object) continue;
- if (!kv.value_ptr.Object.contains("NumArgs")) continue;
- if (!kv.value_ptr.Object.contains("Name")) continue;
+ if (kv.value_ptr.* != .object) continue;
+ if (!kv.value_ptr.object.contains("NumArgs")) continue;
+ if (!kv.value_ptr.object.contains("Name")) continue;
for (blacklisted_options) |blacklisted_key| {
if (std.mem.eql(u8, blacklisted_key, kv.key_ptr.*)) continue :it_map;
}
- if (kv.value_ptr.Object.get("Name").?.String.len == 0) continue;
- try all_objects.append(&kv.value_ptr.Object);
+ if (kv.value_ptr.object.get("Name").?.string.len == 0) continue;
+ try all_objects.append(&kv.value_ptr.object);
}
}
// Some options have multiple matches. As an example, "-Wl,foo" matches both
@@ -666,12 +666,12 @@ pub fn main() anyerror!void {
);
for (all_objects.items) |obj| {
- const name = obj.get("Name").?.String;
+ const name = obj.get("Name").?.string;
var pd1 = false;
var pd2 = false;
var pslash = false;
- for (obj.get("Prefixes").?.Array.items) |prefix_json| {
- const prefix = prefix_json.String;
+ for (obj.get("Prefixes").?.array.items) |prefix_json| {
+ const prefix = prefix_json.string;
if (std.mem.eql(u8, prefix, "-")) {
pd1 = true;
} else if (std.mem.eql(u8, prefix, "--")) {
@@ -790,9 +790,9 @@ const Syntax = union(enum) {
};
fn objSyntax(obj: *json.ObjectMap) ?Syntax {
- const num_args = @intCast(u8, obj.get("NumArgs").?.Integer);
- for (obj.get("!superclasses").?.Array.items) |superclass_json| {
- const superclass = superclass_json.String;
+ const num_args = @intCast(u8, obj.get("NumArgs").?.integer);
+ for (obj.get("!superclasses").?.array.items) |superclass_json| {
+ const superclass = superclass_json.string;
if (std.mem.eql(u8, superclass, "Joined")) {
return .joined;
} else if (std.mem.eql(u8, superclass, "CLJoined")) {
@@ -831,20 +831,20 @@ fn objSyntax(obj: *json.ObjectMap) ?Syntax {
return .{ .multi_arg = num_args };
}
}
- const name = obj.get("Name").?.String;
+ const name = obj.get("Name").?.string;
if (std.mem.eql(u8, name, "<input>")) {
return .flag;
} else if (std.mem.eql(u8, name, "<unknown>")) {
return .flag;
}
- const kind_def = obj.get("Kind").?.Object.get("def").?.String;
+ const kind_def = obj.get("Kind").?.object.get("def").?.string;
if (std.mem.eql(u8, kind_def, "KIND_FLAG")) {
return .flag;
}
- const key = obj.get("!name").?.String;
+ const key = obj.get("!name").?.string;
std.debug.print("{s} (key {s}) has unrecognized superclasses:\n", .{ name, key });
- for (obj.get("!superclasses").?.Array.items) |superclass_json| {
- std.debug.print(" {s}\n", .{superclass_json.String});
+ for (obj.get("!superclasses").?.array.items) |superclass_json| {
+ std.debug.print(" {s}\n", .{superclass_json.string});
}
//std.process.exit(1);
return null;
@@ -883,15 +883,15 @@ fn objectLessThan(context: void, a: *json.ObjectMap, b: *json.ObjectMap) bool {
}
if (!a_match_with_eql and !b_match_with_eql) {
- const a_name = a.get("Name").?.String;
- const b_name = b.get("Name").?.String;
+ const a_name = a.get("Name").?.string;
+ const b_name = b.get("Name").?.string;
if (a_name.len != b_name.len) {
return a_name.len > b_name.len;
}
}
- const a_key = a.get("!name").?.String;
- const b_key = b.get("!name").?.String;
+ const a_key = a.get("!name").?.string;
+ const b_key = b.get("!name").?.string;
return std.mem.lessThan(u8, a_key, b_key);
}
tools/update_cpu_features.zig
@@ -1054,14 +1054,14 @@ fn processOneTarget(job: Job) anyerror!void {
var json_parse_progress = progress_node.start("parse JSON", 0);
json_parse_progress.activate();
- var parser = json.Parser.init(arena, false);
+ var parser = json.Parser.init(arena, .alloc_if_needed);
const tree = try parser.parse(json_text);
json_parse_progress.end();
var render_progress = progress_node.start("render zig code", 0);
render_progress.activate();
- const root_map = &tree.root.Object;
+ const root_map = &tree.root.object;
var features_table = std.StringHashMap(Feature).init(arena);
var all_features = std.ArrayList(Feature).init(arena);
var all_cpus = std.ArrayList(Cpu).init(arena);
@@ -1070,21 +1070,21 @@ fn processOneTarget(job: Job) anyerror!void {
root_it: while (it.next()) |kv| {
if (kv.key_ptr.len == 0) continue;
if (kv.key_ptr.*[0] == '!') continue;
- if (kv.value_ptr.* != .Object) continue;
- if (hasSuperclass(&kv.value_ptr.Object, "SubtargetFeature")) {
- const llvm_name = kv.value_ptr.Object.get("Name").?.String;
+ if (kv.value_ptr.* != .object) continue;
+ if (hasSuperclass(&kv.value_ptr.object, "SubtargetFeature")) {
+ const llvm_name = kv.value_ptr.object.get("Name").?.string;
if (llvm_name.len == 0) continue;
var zig_name = try llvmNameToZigName(arena, llvm_name);
- var desc = kv.value_ptr.Object.get("Desc").?.String;
+ var desc = kv.value_ptr.object.get("Desc").?.string;
var deps = std.ArrayList([]const u8).init(arena);
var omit = false;
var flatten = false;
- const implies = kv.value_ptr.Object.get("Implies").?.Array;
+ const implies = kv.value_ptr.object.get("Implies").?.array;
for (implies.items) |imply| {
- const other_key = imply.Object.get("def").?.String;
- const other_obj = &root_map.getPtr(other_key).?.Object;
- const other_llvm_name = other_obj.get("Name").?.String;
+ const other_key = imply.object.get("def").?.string;
+ const other_obj = &root_map.getPtr(other_key).?.object;
+ const other_llvm_name = other_obj.get("Name").?.string;
const other_zig_name = (try llvmNameToZigNameOmit(
arena,
llvm_target,
@@ -1126,17 +1126,17 @@ fn processOneTarget(job: Job) anyerror!void {
try all_features.append(feature);
}
}
- if (hasSuperclass(&kv.value_ptr.Object, "Processor")) {
- const llvm_name = kv.value_ptr.Object.get("Name").?.String;
+ if (hasSuperclass(&kv.value_ptr.object, "Processor")) {
+ const llvm_name = kv.value_ptr.object.get("Name").?.string;
if (llvm_name.len == 0) continue;
var zig_name = try llvmNameToZigName(arena, llvm_name);
var deps = std.ArrayList([]const u8).init(arena);
- const features = kv.value_ptr.Object.get("Features").?.Array;
+ const features = kv.value_ptr.object.get("Features").?.array;
for (features.items) |feature| {
- const feature_key = feature.Object.get("def").?.String;
- const feature_obj = &root_map.getPtr(feature_key).?.Object;
- const feature_llvm_name = feature_obj.get("Name").?.String;
+ const feature_key = feature.object.get("def").?.string;
+ const feature_obj = &root_map.getPtr(feature_key).?.object;
+ const feature_llvm_name = feature_obj.get("Name").?.string;
if (feature_llvm_name.len == 0) continue;
const feature_zig_name = (try llvmNameToZigNameOmit(
arena,
@@ -1145,11 +1145,11 @@ fn processOneTarget(job: Job) anyerror!void {
)) orelse continue;
try deps.append(feature_zig_name);
}
- const tune_features = kv.value_ptr.Object.get("TuneFeatures").?.Array;
+ const tune_features = kv.value_ptr.object.get("TuneFeatures").?.array;
for (tune_features.items) |feature| {
- const feature_key = feature.Object.get("def").?.String;
- const feature_obj = &root_map.getPtr(feature_key).?.Object;
- const feature_llvm_name = feature_obj.get("Name").?.String;
+ const feature_key = feature.object.get("def").?.string;
+ const feature_obj = &root_map.getPtr(feature_key).?.object;
+ const feature_llvm_name = feature_obj.get("Name").?.string;
if (feature_llvm_name.len == 0) continue;
const feature_zig_name = (try llvmNameToZigNameOmit(
arena,
@@ -1431,8 +1431,8 @@ fn llvmNameToZigNameOmit(
fn hasSuperclass(obj: *json.ObjectMap, class_name: []const u8) bool {
const superclasses_json = obj.get("!superclasses") orelse return false;
- for (superclasses_json.Array.items) |superclass_json| {
- const superclass = superclass_json.String;
+ for (superclasses_json.array.items) |superclass_json| {
+ const superclass = superclass_json.string;
if (std.mem.eql(u8, superclass, class_name)) {
return true;
}
tools/update_spirv_features.zig
@@ -74,8 +74,7 @@ pub fn main() !void {
const registry_path = try fs.path.join(allocator, &.{ spirv_headers_root, "include", "spirv", "unified1", "spirv.core.grammar.json" });
const registry_json = try std.fs.cwd().readFileAlloc(allocator, registry_path, std.math.maxInt(usize));
- var tokens = std.json.TokenStream.init(registry_json);
- const registry = try std.json.parse(g.CoreRegistry, &tokens, .{ .allocator = allocator });
+ const registry = try std.json.parseFromSlice(g.CoreRegistry, allocator, registry_json, .{});
const capabilities = for (registry.operand_kinds) |opkind| {
if (std.mem.eql(u8, opkind.kind, "Capability"))