Commit d989396a34
Changed files (1)
lib
std
lib/std/json.zig
@@ -19,6 +19,74 @@ const StringEscapes = union(enum) {
},
};
+/// Checks to see if a string matches what it would be as a json-encoded string
+/// Assumes that `encoded` is a well-formed json string
+fn encodesTo(decoded: []const u8, encoded: []const u8) bool {
+ var i: usize = 0;
+ var j: usize = 0;
+ while (i < decoded.len) {
+ if (j >= encoded.len) return false;
+ if (encoded[j] != '\\') {
+ if (decoded[i] != encoded[j]) return false;
+ j += 1;
+ i += 1;
+ } else {
+ const escape_type = encoded[j + 1];
+ if (escape_type != 'u') {
+ const t: u8 = switch (escape_type) {
+ '\\' => '\\',
+ '/' => '/',
+ 'n' => '\n',
+ 'r' => '\r',
+ 't' => '\t',
+ 'f' => 12,
+ 'b' => 8,
+ '"' => '"',
+ else => unreachable,
+ };
+ if (decoded[i] != t) return false;
+ j += 2;
+ i += 1;
+ } else {
+ var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
+ j += 6;
+ if (codepoint >= 0xD800 and codepoint < 0xDC00) {
+ // surrogate pair
+ assert(encoded[j] == '\\');
+ assert(encoded[j + 1] == 'u');
+ const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
+ codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff));
+ j += 6;
+ }
+ var buf: [4]u8 = undefined;
+ const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
+ if (i + len > decoded.len) return false;
+ if (!mem.eql(u8, decoded[i .. i + len], buf[0..len])) return false;
+ i += len;
+ }
+ }
+ }
+ assert(i == decoded.len);
+ assert(j == encoded.len);
+ return true;
+}
+
+test "encodesTo" {
+ // same
+ testing.expectEqual(true, encodesTo("false", "false"));
+ // totally different
+ testing.expectEqual(false, encodesTo("false", "true"));
+ // differnt lengths
+ testing.expectEqual(false, encodesTo("false", "other"));
+ // with escape
+ testing.expectEqual(true, encodesTo("\\", "\\\\"));
+ testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape"));
+ // with unicode
+ testing.expectEqual(true, encodesTo("ą", "\\u0105"));
+ testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02"));
+ testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02"));
+}
+
/// A single token slice into the parent string.
///
/// Use `token.slice()` on the input at the current position to get the current slice.
@@ -1201,6 +1269,490 @@ pub const Value = union(enum) {
}
};
+pub const ParseOptions = struct {
+ allocator: ?*Allocator = null,
+
+ /// Behaviour when a duplicate field is encountered.
+ duplicate_field_behavior: enum {
+ UseFirst,
+ Error,
+ UseLast,
+ } = .Error,
+};
+
+fn parseInternal(comptime T: type, token: Token, tokens: *TokenStream, options: ParseOptions) !T {
+ switch (@typeInfo(T)) {
+ .Bool => {
+ return switch (token) {
+ .True => true,
+ .False => false,
+ else => error.UnexpectedToken,
+ };
+ },
+ .Float, .ComptimeFloat => {
+ const numberToken = switch (token) {
+ .Number => |n| n,
+ else => return error.UnexpectedToken,
+ };
+ return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1));
+ },
+ .Int, .ComptimeInt => {
+ const numberToken = switch (token) {
+ .Number => |n| n,
+ else => return error.UnexpectedToken,
+ };
+ if (!numberToken.is_integer) return error.UnexpectedToken;
+ return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10);
+ },
+ .Optional => |optionalInfo| {
+ if (token == .Null) {
+ return null;
+ } else {
+ return try parseInternal(optionalInfo.child, token, tokens, options);
+ }
+ },
+ .Enum => |enumInfo| {
+ switch (token) {
+ .Number => |numberToken| {
+ if (!numberToken.is_integer) return error.UnexpectedToken;
+ const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10);
+ return try std.meta.intToEnum(T, n);
+ },
+ .String => |stringToken| {
+ const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+ switch (stringToken.escapes) {
+ .None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag,
+ .Some => {
+ inline for (enumInfo.fields) |field| {
+ if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) {
+ return @field(T, field.name);
+ }
+ }
+ return error.InvalidEnumTag;
+ },
+ }
+ },
+ else => return error.UnexpectedToken,
+ }
+ },
+ .Union => |unionInfo| {
+ if (unionInfo.tag_type) |_| {
+ // try each of the union fields until we find one that matches
+ inline for (unionInfo.fields) |u_field| {
+ if (parseInternal(u_field.field_type, token, tokens, options)) |value| {
+ return @unionInit(T, u_field.name, value);
+ } else |err| {
+ // Bubble up error.OutOfMemory
+ // Parsing some types won't have OutOfMemory in their
+ // error-sets, for the condition to be valid, merge it in.
+ if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
+ // otherwise continue through the `inline for`
+ }
+ }
+ return error.NoUnionMembersMatched;
+ } else {
+ @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
+ }
+ },
+ .Struct => |structInfo| {
+ switch (token) {
+ .ObjectBegin => {},
+ else => return error.UnexpectedToken,
+ }
+ var r: T = undefined;
+ var fields_seen = [_]bool{false} ** structInfo.fields.len;
+ errdefer {
+ inline for (structInfo.fields) |field, i| {
+ if (fields_seen[i]) {
+ parseFree(field.field_type, @field(r, field.name), options);
+ }
+ }
+ }
+
+ while (true) {
+ switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) {
+ .ObjectEnd => break,
+ .String => |stringToken| {
+ const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+ var found = false;
+ inline for (structInfo.fields) |field, i| {
+ // TODO: using switches here segfault the compiler (#2727?)
+ if ((stringToken.escapes == .None and mem.eql(u8, field.name, key_source_slice)) or (stringToken.escapes == .Some and (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)))) {
+ // if (switch (stringToken.escapes) {
+ // .None => mem.eql(u8, field.name, key_source_slice),
+ // .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)),
+ // }) {
+ if (fields_seen[i]) {
+ // switch (options.duplicate_field_behavior) {
+ // .UseFirst => {},
+ // .Error => {},
+ // .UseLast => {},
+ // }
+ if (options.duplicate_field_behavior == .UseFirst) {
+ break;
+ } else if (options.duplicate_field_behavior == .Error) {
+ return error.DuplicateJSONField;
+ } else if (options.duplicate_field_behavior == .UseLast) {
+ parseFree(field.field_type, @field(r, field.name), options);
+ }
+ }
+ @field(r, field.name) = try parse(field.field_type, tokens, options);
+ fields_seen[i] = true;
+ found = true;
+ break;
+ }
+ }
+ if (!found) return error.UnknownField;
+ },
+ else => return error.UnexpectedToken,
+ }
+ }
+ inline for (structInfo.fields) |field, i| {
+ if (!fields_seen[i]) {
+ if (field.default_value) |default| {
+ @field(r, field.name) = default;
+ } else {
+ return error.MissingField;
+ }
+ }
+ }
+ return r;
+ },
+ .Array => |arrayInfo| {
+ switch (token) {
+ .ArrayBegin => {
+ var r: T = undefined;
+ var i: usize = 0;
+ errdefer {
+ while (true) : (i -= 1) {
+ parseFree(arrayInfo.child, r[i], options);
+ if (i == 0) break;
+ }
+ }
+ while (i < r.len) : (i += 1) {
+ r[i] = try parse(arrayInfo.child, tokens, options);
+ }
+ const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
+ switch (tok) {
+ .ArrayEnd => {},
+ else => return error.UnexpectedToken,
+ }
+ return r;
+ },
+ .String => |stringToken| {
+ if (arrayInfo.child != u8) return error.UnexpectedToken;
+ var r: T = undefined;
+ const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+ switch (stringToken.escapes) {
+ .None => mem.copy(u8, &r, source_slice),
+ .Some => try unescapeString(&r, source_slice),
+ }
+ return r;
+ },
+ else => return error.UnexpectedToken,
+ }
+ },
+ .Pointer => |ptrInfo| {
+ const allocator = options.allocator orelse return error.AllocatorRequired;
+ switch (ptrInfo.size) {
+ .One => {
+ const r: T = allocator.create(ptrInfo.child);
+ r.* = try parseInternal(ptrInfo.child, token, tokens, options);
+ return r;
+ },
+ .Slice => {
+ switch (token) {
+ .ArrayBegin => {
+ var arraylist = std.ArrayList(ptrInfo.child).init(allocator);
+ errdefer {
+ while (arraylist.popOrNull()) |v| {
+ parseFree(ptrInfo.child, v, options);
+ }
+ arraylist.deinit();
+ }
+
+ while (true) {
+ const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
+ switch (tok) {
+ .ArrayEnd => break,
+ else => {},
+ }
+
+ try arraylist.ensureCapacity(arraylist.len + 1);
+ const v = try parseInternal(ptrInfo.child, tok, tokens, options);
+ arraylist.appendAssumeCapacity(v);
+ }
+ return arraylist.toOwnedSlice();
+ },
+ .String => |stringToken| {
+ if (ptrInfo.child != u8) return error.UnexpectedToken;
+ const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+ switch (stringToken.escapes) {
+ .None => return mem.dupe(allocator, u8, source_slice),
+ .Some => |some_escapes| {
+ const output = try allocator.alloc(u8, stringToken.decodedLength());
+ errdefer allocator.free(output);
+ try unescapeString(output, source_slice);
+ return output;
+ },
+ }
+ },
+ else => return error.UnexpectedToken,
+ }
+ },
+ else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+ }
+ },
+ else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+ }
+ unreachable;
+}
+
+pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) !T {
+ const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
+ return parseInternal(T, token, tokens, options);
+}
+
+/// Releases resources created by `parse`.
+/// Should be called with the same type and `ParseOptions` that were passed to `parse`
+pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void {
+ switch (@typeInfo(T)) {
+ .Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {},
+ .Optional => {
+ if (value) |v| {
+ return parseFree(@TypeOf(v), v, options);
+ }
+ },
+ .Union => |unionInfo| {
+ if (unionInfo.tag_type) |UnionTagType| {
+ inline for (unionInfo.fields) |u_field| {
+ if (@enumToInt(@as(UnionTagType, value)) == u_field.enum_field.?.value) {
+ parseFree(u_field.field_type, @field(value, u_field.name), options);
+ break;
+ }
+ }
+ } else {
+ unreachable;
+ }
+ },
+ .Struct => |structInfo| {
+ inline for (structInfo.fields) |field| {
+ parseFree(field.field_type, @field(value, field.name), options);
+ }
+ },
+ .Array => |arrayInfo| {
+ for (value) |v| {
+ parseFree(arrayInfo.child, v, options);
+ }
+ },
+ .Pointer => |ptrInfo| {
+ const allocator = options.allocator orelse unreachable;
+ switch (ptrInfo.size) {
+ .One => {
+ parseFree(ptrInfo.child, value.*, options);
+ allocator.destroy(v);
+ },
+ .Slice => {
+ for (value) |v| {
+ parseFree(ptrInfo.child, v, options);
+ }
+ allocator.free(value);
+ },
+ else => unreachable,
+ }
+ },
+ else => unreachable,
+ }
+}
+
+test "parse" {
+ testing.expectEqual(false, try parse(bool, &TokenStream.init("false"), ParseOptions{}));
+ testing.expectEqual(true, try parse(bool, &TokenStream.init("true"), ParseOptions{}));
+ testing.expectEqual(@as(u1, 1), try parse(u1, &TokenStream.init("1"), ParseOptions{}));
+ testing.expectError(error.Overflow, parse(u1, &TokenStream.init("50"), ParseOptions{}));
+ testing.expectEqual(@as(u64, 42), try parse(u64, &TokenStream.init("42"), ParseOptions{}));
+ testing.expectEqual(@as(f64, 42), try parse(f64, &TokenStream.init("42.0"), ParseOptions{}));
+ testing.expectEqual(@as(?bool, null), try parse(?bool, &TokenStream.init("null"), ParseOptions{}));
+ testing.expectEqual(@as(?bool, true), try parse(?bool, &TokenStream.init("true"), ParseOptions{}));
+
+ testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("\"foo\""), ParseOptions{}));
+ testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("[102, 111, 111]"), ParseOptions{}));
+}
+
+test "parse into enum" {
+ const T = extern enum {
+ Foo = 42,
+ Bar,
+ @"with\\escape",
+ };
+ testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("\"Foo\""), ParseOptions{}));
+ testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("42"), ParseOptions{}));
+ testing.expectEqual(@as(T, .@"with\\escape"), try parse(T, &TokenStream.init("\"with\\\\escape\""), ParseOptions{}));
+ testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("5"), ParseOptions{}));
+ testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("\"Qux\""), ParseOptions{}));
+}
+
+test "parse into that allocates a slice" {
+ testing.expectError(error.AllocatorRequired, parse([]u8, &TokenStream.init("\"foo\""), ParseOptions{}));
+
+ const options = ParseOptions{ .allocator = testing.allocator };
+ {
+ const r = try parse([]u8, &TokenStream.init("\"foo\""), options);
+ defer parseFree([]u8, r, options);
+ testing.expectEqualSlices(u8, "foo", r);
+ }
+ {
+ const r = try parse([]u8, &TokenStream.init("[102, 111, 111]"), options);
+ defer parseFree([]u8, r, options);
+ testing.expectEqualSlices(u8, "foo", r);
+ }
+ {
+ const r = try parse([]u8, &TokenStream.init("\"with\\\\escape\""), options);
+ defer parseFree([]u8, r, options);
+ testing.expectEqualSlices(u8, "with\\escape", r);
+ }
+}
+
+test "parse into tagged union" {
+ {
+ const T = union(enum) {
+ int: i32,
+ float: f64,
+ string: []const u8,
+ };
+ testing.expectEqual(T{ .float = 1.5 }, try parse(T, &TokenStream.init("1.5"), ParseOptions{}));
+ }
+
+ { // if union matches string member, fails with NoUnionMembersMatched rather than AllocatorRequired
+ // Note that this behaviour wasn't necessarily by design, but was
+ // what fell out of the implementation and may result in interesting
+ // API breakage if changed
+ const T = union(enum) {
+ int: i32,
+ float: f64,
+ string: []const u8,
+ };
+ testing.expectError(error.NoUnionMembersMatched, parse(T, &TokenStream.init("\"foo\""), ParseOptions{}));
+ }
+
+ { // failing allocations should be bubbled up instantly without trying next member
+ var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0);
+ const options = ParseOptions{ .allocator = &fail_alloc.allocator };
+ const T = union(enum) {
+ // both fields here match the input
+ string: []const u8,
+ array: [3]u8,
+ };
+ testing.expectError(error.OutOfMemory, parse(T, &TokenStream.init("[1,2,3]"), options));
+ }
+
+ {
+ // if multiple matches possible, takes first option
+ const T = union(enum) {
+ x: u8,
+ y: u8,
+ };
+ testing.expectEqual(T{ .x = 42 }, try parse(T, &TokenStream.init("42"), ParseOptions{}));
+ }
+}
+
+test "parseFree descends into tagged union" {
+ var fail_alloc = testing.FailingAllocator.init(testing.allocator, 1);
+ const options = ParseOptions{ .allocator = &fail_alloc.allocator };
+ const T = union(enum) {
+ int: i32,
+ float: f64,
+ string: []const u8,
+ };
+ // use a string with unicode escape so we know result can't be a reference to global constant
+ const r = try parse(T, &TokenStream.init("\"with\\u0105unicode\""), options);
+ testing.expectEqual(@TagType(T).string, @as(@TagType(T), r));
+ testing.expectEqualSlices(u8, "withąunicode", r.string);
+ testing.expectEqual(@as(usize, 0), fail_alloc.deallocations);
+ parseFree(T, r, options);
+ testing.expectEqual(@as(usize, 1), fail_alloc.deallocations);
+}
+
+test "parse into struct with no fields" {
+ const T = struct {};
+ testing.expectEqual(T{}, try parse(T, &TokenStream.init("{}"), ParseOptions{}));
+}
+
+test "parse into struct with misc fields" {
+ @setEvalBranchQuota(10000);
+ const options = ParseOptions{ .allocator = testing.allocator };
+ const T = struct {
+ int: i64,
+ float: f64,
+ @"with\\escape": bool,
+ @"withąunicode😂": bool,
+ language: []const u8,
+ optional: ?bool,
+ default_field: i32 = 42,
+ static_array: [3]f64,
+ dynamic_array: []f64,
+
+ const Bar = struct {
+ nested: []const u8,
+ };
+ complex: Bar,
+
+ const Baz = struct {
+ foo: []const u8,
+ };
+ veryComplex: []Baz,
+
+ const Union = union(enum) {
+ x: u8,
+ float: f64,
+ string: []const u8,
+ };
+ a_union: Union,
+ };
+ const r = try parse(T, &TokenStream.init(
+ \\{
+ \\ "int": 420,
+ \\ "float": 3.14,
+ \\ "with\\escape": true,
+ \\ "with\u0105unicode\ud83d\ude02": false,
+ \\ "language": "zig",
+ \\ "optional": null,
+ \\ "static_array": [66.6, 420.420, 69.69],
+ \\ "dynamic_array": [66.6, 420.420, 69.69],
+ \\ "complex": {
+ \\ "nested": "zig"
+ \\ },
+ \\ "veryComplex": [
+ \\ {
+ \\ "foo": "zig"
+ \\ }, {
+ \\ "foo": "rocks"
+ \\ }
+ \\ ],
+ \\ "a_union": 100000
+ \\}
+ ), options);
+ defer parseFree(T, r, options);
+ testing.expectEqual(@as(i64, 420), r.int);
+ testing.expectEqual(@as(f64, 3.14), r.float);
+ testing.expectEqual(true, r.@"with\\escape");
+ testing.expectEqual(false, r.@"withąunicode😂");
+ testing.expectEqualSlices(u8, "zig", r.language);
+ testing.expectEqual(@as(?bool, null), r.optional);
+ testing.expectEqual(@as(i32, 42), r.default_field);
+ testing.expectEqual(@as(f64, 66.6), r.static_array[0]);
+ testing.expectEqual(@as(f64, 420.420), r.static_array[1]);
+ testing.expectEqual(@as(f64, 69.69), r.static_array[2]);
+ testing.expectEqual(@as(usize, 3), r.dynamic_array.len);
+ testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]);
+ testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]);
+ testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]);
+ testing.expectEqualSlices(u8, r.complex.nested, "zig");
+ testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo);
+ testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo);
+ testing.expectEqual(T.Union{ .float = 100000 }, r.a_union);
+}
+
/// A non-stream JSON parser which constructs a tree of Value's.
pub const Parser = struct {
allocator: *Allocator,