Commit 874d2dd9f7

Josh Wolfe <thejoshwolfe@gmail.com>
2023-07-10 04:18:59
std.json: add generic hash map that parses/stringifies with arbitrary string keys (#16366)
* expose innerParseFromValue
1 parent a755310
Changed files (4)
lib/std/json/hashmap.zig
@@ -0,0 +1,103 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+const ParseOptions = @import("static.zig").ParseOptions;
+const innerParse = @import("static.zig").innerParse;
+const innerParseFromValue = @import("static.zig").innerParseFromValue;
+const Value = @import("dynamic.zig").Value;
+const StringifyOptions = @import("stringify.zig").StringifyOptions;
+const stringify = @import("stringify.zig").stringify;
+const encodeJsonString = @import("stringify.zig").encodeJsonString;
+
+/// A thin wrapper around `std.StringArrayHashMapUnmanaged` that implements
+/// `jsonParse`, `jsonParseFromValue`, and `jsonStringify`.
+/// This is useful when your JSON schema has an object with arbitrary data keys
+/// instead of comptime-known struct field names.
+pub fn ArrayHashMap(comptime T: type) type {
+    return struct {
+        map: std.StringArrayHashMapUnmanaged(T) = .{},
+
+        pub fn deinit(self: *@This(), allocator: Allocator) void {
+            self.map.deinit(allocator);
+        }
+
+        pub fn jsonParse(allocator: Allocator, source: anytype, options: ParseOptions) !@This() {
+            var map = std.StringArrayHashMapUnmanaged(T){};
+            errdefer map.deinit(allocator);
+
+            if (.object_begin != try source.next()) return error.UnexpectedToken;
+            while (true) {
+                const token = try source.nextAlloc(allocator, .alloc_if_needed);
+                switch (token) {
+                    inline .string, .allocated_string => |k| {
+                        const gop = try map.getOrPut(allocator, k);
+                        if (token == .allocated_string) {
+                            // Free the key before recursing in case we're using an allocator
+                            // that optimizes freeing the last allocated object.
+                            allocator.free(k);
+                        }
+                        if (gop.found_existing) {
+                            switch (options.duplicate_field_behavior) {
+                                .use_first => {
+                                    // Parse and ignore the redundant value.
+                                    // We don't want to skip the value, because we want type checking.
+                                    _ = try innerParse(T, allocator, source, options);
+                                    continue;
+                                },
+                                .@"error" => return error.DuplicateField,
+                                .use_last => {},
+                            }
+                        }
+                        gop.value_ptr.* = try innerParse(T, allocator, source, options);
+                    },
+                    .object_end => break,
+                    else => unreachable,
+                }
+            }
+            return .{ .map = map };
+        }
+
+        pub fn jsonParseFromValue(allocator: Allocator, source: Value, options: ParseOptions) !@This() {
+            if (source != .object) return error.UnexpectedToken;
+
+            var map = std.StringArrayHashMapUnmanaged(T){};
+            errdefer map.deinit(allocator);
+
+            var it = source.object.iterator();
+            while (it.next()) |kv| {
+                try map.put(allocator, kv.key_ptr.*, try innerParseFromValue(T, allocator, kv.value_ptr.*, options));
+            }
+            return .{ .map = map };
+        }
+
+        pub fn jsonStringify(self: @This(), options: StringifyOptions, out_stream: anytype) !void {
+            try out_stream.writeByte('{');
+            var field_output = false;
+            var child_options = options;
+            child_options.whitespace.indent_level += 1;
+            var it = self.map.iterator();
+            while (it.next()) |kv| {
+                if (!field_output) {
+                    field_output = true;
+                } else {
+                    try out_stream.writeByte(',');
+                }
+                try child_options.whitespace.outputIndent(out_stream);
+                try encodeJsonString(kv.key_ptr.*, options, out_stream);
+                try out_stream.writeByte(':');
+                if (child_options.whitespace.separator) {
+                    try out_stream.writeByte(' ');
+                }
+                try stringify(kv.value_ptr.*, child_options, out_stream);
+            }
+            if (field_output) {
+                try options.whitespace.outputIndent(out_stream);
+            }
+            try out_stream.writeByte('}');
+        }
+    };
+}
+
+test {
+    _ = @import("hashmap_test.zig");
+}
lib/std/json/hashmap_test.zig
@@ -0,0 +1,139 @@
+const std = @import("std");
+const testing = std.testing;
+
+const ArrayHashMap = @import("hashmap.zig").ArrayHashMap;
+
+const parseFromSlice = @import("static.zig").parseFromSlice;
+const parseFromSliceLeaky = @import("static.zig").parseFromSliceLeaky;
+const parseFromValue = @import("static.zig").parseFromValue;
+const stringifyAlloc = @import("stringify.zig").stringifyAlloc;
+const Value = @import("dynamic.zig").Value;
+
+const T = struct {
+    i: i32,
+    s: []const u8,
+};
+
+test "parse json hashmap" {
+    const doc =
+        \\{
+        \\  "abc": {"i": 0, "s": "d"},
+        \\  "xyz": {"i": 1, "s": "w"}
+        \\}
+    ;
+    const parsed = try parseFromSlice(ArrayHashMap(T), testing.allocator, doc, .{});
+    defer parsed.deinit();
+
+    try testing.expectEqual(@as(usize, 2), parsed.value.map.count());
+    try testing.expectEqualStrings("d", parsed.value.map.get("abc").?.s);
+    try testing.expectEqual(@as(i32, 1), parsed.value.map.get("xyz").?.i);
+}
+
+test "parse json hashmap duplicate fields" {
+    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer arena.deinit();
+
+    const doc =
+        \\{
+        \\  "abc": {"i": 0, "s": "d"},
+        \\  "abc": {"i": 1, "s": "w"}
+        \\}
+    ;
+
+    try testing.expectError(error.DuplicateField, parseFromSliceLeaky(ArrayHashMap(T), arena.allocator(), doc, .{
+        .duplicate_field_behavior = .@"error",
+    }));
+
+    const first = try parseFromSliceLeaky(ArrayHashMap(T), arena.allocator(), doc, .{
+        .duplicate_field_behavior = .use_first,
+    });
+    try testing.expectEqual(@as(usize, 1), first.map.count());
+    try testing.expectEqual(@as(i32, 0), first.map.get("abc").?.i);
+
+    const last = try parseFromSliceLeaky(ArrayHashMap(T), arena.allocator(), doc, .{
+        .duplicate_field_behavior = .use_last,
+    });
+    try testing.expectEqual(@as(usize, 1), last.map.count());
+    try testing.expectEqual(@as(i32, 1), last.map.get("abc").?.i);
+}
+
+test "stringify json hashmap" {
+    var value = ArrayHashMap(T){};
+    defer value.deinit(testing.allocator);
+    {
+        const doc = try stringifyAlloc(testing.allocator, value, .{});
+        defer testing.allocator.free(doc);
+        try testing.expectEqualStrings("{}", doc);
+    }
+
+    try value.map.put(testing.allocator, "abc", .{ .i = 0, .s = "d" });
+    try value.map.put(testing.allocator, "xyz", .{ .i = 1, .s = "w" });
+
+    {
+        const doc = try stringifyAlloc(testing.allocator, value, .{});
+        defer testing.allocator.free(doc);
+        try testing.expectEqualStrings(
+            \\{"abc":{"i":0,"s":"d"},"xyz":{"i":1,"s":"w"}}
+        , doc);
+    }
+
+    try testing.expect(value.map.swapRemove("abc"));
+    {
+        const doc = try stringifyAlloc(testing.allocator, value, .{});
+        defer testing.allocator.free(doc);
+        try testing.expectEqualStrings(
+            \\{"xyz":{"i":1,"s":"w"}}
+        , doc);
+    }
+
+    try testing.expect(value.map.swapRemove("xyz"));
+    {
+        const doc = try stringifyAlloc(testing.allocator, value, .{});
+        defer testing.allocator.free(doc);
+        try testing.expectEqualStrings("{}", doc);
+    }
+}
+
+test "stringify json hashmap whitespace" {
+    var value = ArrayHashMap(T){};
+    defer value.deinit(testing.allocator);
+    try value.map.put(testing.allocator, "abc", .{ .i = 0, .s = "d" });
+    try value.map.put(testing.allocator, "xyz", .{ .i = 1, .s = "w" });
+
+    {
+        const doc = try stringifyAlloc(testing.allocator, value, .{
+            .whitespace = .{
+                .indent = .{ .space = 2 },
+            },
+        });
+        defer testing.allocator.free(doc);
+        try testing.expectEqualStrings(
+            \\{
+            \\  "abc": {
+            \\    "i": 0,
+            \\    "s": "d"
+            \\  },
+            \\  "xyz": {
+            \\    "i": 1,
+            \\    "s": "w"
+            \\  }
+            \\}
+        , doc);
+    }
+}
+
+test "json parse from value hashmap" {
+    const doc =
+        \\{
+        \\  "abc": {"i": 0, "s": "d"},
+        \\  "xyz": {"i": 1, "s": "w"}
+        \\}
+    ;
+    const parsed1 = try parseFromSlice(Value, testing.allocator, doc, .{});
+    defer parsed1.deinit();
+
+    const parsed2 = try parseFromValue(ArrayHashMap(T), testing.allocator, parsed1.value, .{});
+    defer parsed2.deinit();
+
+    try testing.expectEqualStrings("d", parsed2.value.map.get("abc").?.s);
+}
lib/std/json/static.zig
@@ -145,6 +145,7 @@ pub fn parseFromTokenSourceLeaky(
 }
 
 /// Like `parseFromSlice`, but the input is an already-parsed `std.json.Value` object.
+/// Only `options.ignore_unknown_fields` is used from `options`.
 pub fn parseFromValue(
     comptime T: type,
     allocator: Allocator,
@@ -173,7 +174,7 @@ pub fn parseFromValueLeaky(
     // I guess this function doesn't need to exist,
     // but the flow of the sourcecode is easy to follow and grouped nicely with
     // this pub redirect function near the top and the implementation near the bottom.
-    return internalParseFromValue(T, allocator, source, options);
+    return innerParseFromValue(T, allocator, source, options);
 }
 
 /// The error set that will be returned when parsing from `*Source`.
@@ -199,7 +200,7 @@ pub const ParseFromValueError = std.fmt.ParseIntError || std.fmt.ParseFloatError
 /// during the implementation of `parseFromTokenSourceLeaky` and similar.
 /// It is exposed primarily to enable custom `jsonParse()` methods to call back into the `parseFrom*` system,
 /// such as if you're implementing a custom container of type `T`;
-/// you can call `internalParse(T, ...)` for each of the container's items.
+/// you can call `innerParse(T, ...)` for each of the container's items.
 /// Note that `null` fields are not allowed on the `options` when calling this function.
 /// (The `options` you get in your `jsonParse` method has no `null` fields.)
 pub fn innerParse(
@@ -528,7 +529,12 @@ fn internalParseArray(
     return r;
 }
 
-fn internalParseFromValue(
+/// This is an internal function called recursively
+/// during the implementation of `parseFromValueLeaky`.
+/// It is exposed primarily to enable custom `jsonParseFromValue()` methods to call back into the `parseFromValue*` system,
+/// such as if you're implementing a custom container of type `T`;
+/// you can call `innerParseFromValue(T, ...)` for each of the container's items.
+pub fn innerParseFromValue(
     comptime T: type,
     allocator: Allocator,
     source: Value,
@@ -571,7 +577,7 @@ fn internalParseFromValue(
         .Optional => |optionalInfo| {
             switch (source) {
                 .null => return null,
-                else => return try internalParseFromValue(optionalInfo.child, allocator, source, options),
+                else => return try innerParseFromValue(optionalInfo.child, allocator, source, options),
             }
         },
         .Enum => {
@@ -609,7 +615,7 @@ fn internalParseFromValue(
                         return @unionInit(T, u_field.name, {});
                     }
                     // Recurse.
-                    return @unionInit(T, u_field.name, try internalParseFromValue(u_field.type, allocator, kv.value_ptr.*, options));
+                    return @unionInit(T, u_field.name, try innerParseFromValue(u_field.type, allocator, kv.value_ptr.*, options));
                 }
             }
             // Didn't match anything.
@@ -623,7 +629,7 @@ fn internalParseFromValue(
 
                 var r: T = undefined;
                 inline for (0..structInfo.fields.len, source.array.items) |i, item| {
-                    r[i] = try internalParseFromValue(structInfo.fields[i].type, allocator, item, options);
+                    r[i] = try innerParseFromValue(structInfo.fields[i].type, allocator, item, options);
                 }
 
                 return r;
@@ -645,19 +651,8 @@ fn internalParseFromValue(
                 inline for (structInfo.fields, 0..) |field, i| {
                     if (field.is_comptime) @compileError("comptime fields are not supported: " ++ @typeName(T) ++ "." ++ field.name);
                     if (std.mem.eql(u8, field.name, field_name)) {
-                        if (fields_seen[i]) {
-                            switch (options.duplicate_field_behavior) {
-                                .use_first => {
-                                    // Parse and ignore the redundant value.
-                                    // We don't want to skip the value, because we want type checking.
-                                    _ = try internalParseFromValue(field.type, allocator, kv.value_ptr.*, options);
-                                    break;
-                                },
-                                .@"error" => return error.DuplicateField,
-                                .use_last => {},
-                            }
-                        }
-                        @field(r, field.name) = try internalParseFromValue(field.type, allocator, kv.value_ptr.*, options);
+                        assert(!fields_seen[i]); // Can't have duplicate keys in a Value.object.
+                        @field(r, field.name) = try innerParseFromValue(field.type, allocator, kv.value_ptr.*, options);
                         fields_seen[i] = true;
                         break;
                     }
@@ -674,7 +669,7 @@ fn internalParseFromValue(
             switch (source) {
                 .array => |array| {
                     // Typical array.
-                    return internalParseArrayFromArrayValue(T, arrayInfo.child, arrayInfo.len, allocator, array, options);
+                    return innerParseArrayFromArrayValue(T, arrayInfo.child, arrayInfo.len, allocator, array, options);
                 },
                 .string => |s| {
                     if (arrayInfo.child != u8) return error.UnexpectedToken;
@@ -694,7 +689,7 @@ fn internalParseFromValue(
         .Vector => |vecInfo| {
             switch (source) {
                 .array => |array| {
-                    return internalParseArrayFromArrayValue(T, vecInfo.child, vecInfo.len, allocator, array, options);
+                    return innerParseArrayFromArrayValue(T, vecInfo.child, vecInfo.len, allocator, array, options);
                 },
                 else => return error.UnexpectedToken,
             }
@@ -704,7 +699,7 @@ fn internalParseFromValue(
             switch (ptrInfo.size) {
                 .One => {
                     const r: *ptrInfo.child = try allocator.create(ptrInfo.child);
-                    r.* = try internalParseFromValue(ptrInfo.child, allocator, source, options);
+                    r.* = try innerParseFromValue(ptrInfo.child, allocator, source, options);
                     return r;
                 },
                 .Slice => {
@@ -716,7 +711,7 @@ fn internalParseFromValue(
                                 try allocator.alloc(ptrInfo.child, array.items.len);
 
                             for (array.items, r) |item, *dest| {
-                                dest.* = try internalParseFromValue(ptrInfo.child, allocator, item, options);
+                                dest.* = try innerParseFromValue(ptrInfo.child, allocator, item, options);
                             }
 
                             return r;
@@ -743,7 +738,7 @@ fn internalParseFromValue(
     }
 }
 
-fn internalParseArrayFromArrayValue(
+fn innerParseArrayFromArrayValue(
     comptime T: type,
     comptime Child: type,
     comptime len: comptime_int,
@@ -755,7 +750,7 @@ fn internalParseArrayFromArrayValue(
 
     var r: T = undefined;
     for (array.items, 0..) |item, i| {
-        r[i] = try internalParseFromValue(Child, allocator, item, options);
+        r[i] = try innerParseFromValue(Child, allocator, item, options);
     }
 
     return r;
lib/std/json.zig
@@ -69,6 +69,8 @@ pub const ObjectMap = @import("json/dynamic.zig").ObjectMap;
 pub const Array = @import("json/dynamic.zig").Array;
 pub const Value = @import("json/dynamic.zig").Value;
 
+pub const ArrayHashMap = @import("json/hashmap.zig").ArrayHashMap;
+
 pub const validate = @import("json/scanner.zig").validate;
 pub const Error = @import("json/scanner.zig").Error;
 pub const reader = @import("json/scanner.zig").reader;
@@ -91,6 +93,7 @@ pub const parseFromTokenSourceLeaky = @import("json/static.zig").parseFromTokenS
 pub const innerParse = @import("json/static.zig").innerParse;
 pub const parseFromValue = @import("json/static.zig").parseFromValue;
 pub const parseFromValueLeaky = @import("json/static.zig").parseFromValueLeaky;
+pub const innerParseFromValue = @import("json/static.zig").innerParseFromValue;
 pub const ParseError = @import("json/static.zig").ParseError;
 pub const ParseFromValueError = @import("json/static.zig").ParseFromValueError;
 
@@ -116,6 +119,7 @@ test {
     _ = @import("json/scanner.zig");
     _ = @import("json/write_stream.zig");
     _ = @import("json/dynamic.zig");
+    _ = @import("json/hashmap_test.zig");
     _ = @import("json/static.zig");
     _ = @import("json/stringify.zig");
     _ = @import("json/JSONTestSuite_test.zig");