Commit febfcbd49d

Josh Wolfe <thejoshwolfe@gmail.com>
2024-08-22 14:26:14
std.json.WriteStream supports streaming long values directly to the underlying stream (#21155)
1 parent 31220b5
Changed files (2)
lib/std/json/stringify.zig
@@ -156,36 +156,23 @@ pub fn writeStreamArbitraryDepth(
 ///    | <array>
 ///    | write
 ///    | print
+///    | <writeRawStream>
 ///  <object> = beginObject ( <field> <value> )* endObject
-///  <field> = objectField | objectFieldRaw
+///  <field> = objectField | objectFieldRaw | <objectFieldRawStream>
 ///  <array> = beginArray ( <value> )* endArray
+///  <writeRawStream> = beginWriteRaw ( stream.writeAll )* endWriteRaw
+///  <objectFieldRawStream> = beginObjectFieldRaw ( stream.writeAll )* endObjectFieldRaw
 /// ```
 ///
-/// Supported types:
-///  * Zig `bool` -> JSON `true` or `false`.
-///  * Zig `?T` -> `null` or the rendering of `T`.
-///  * Zig `i32`, `u64`, etc. -> JSON number or string.
-///      * When option `emit_nonportable_numbers_as_strings` is true, if the value is outside the range `+-1<<53` (the precise integer range of f64), it is rendered as a JSON string in base 10. Otherwise, it is rendered as JSON number.
-///  * Zig floats -> JSON number or string.
-///      * If the value cannot be precisely represented by an f64, it is rendered as a JSON string. Otherwise, it is rendered as JSON number.
-///      * TODO: Float rendering will likely change in the future, e.g. to remove the unnecessary "e+00".
-///  * Zig `[]const u8`, `[]u8`, `*[N]u8`, `@Vector(N, u8)`, and similar -> JSON string.
-///      * See `StringifyOptions.emit_strings_as_arrays`.
-///      * If the content is not valid UTF-8, rendered as an array of numbers instead.
-///  * Zig `[]T`, `[N]T`, `*[N]T`, `@Vector(N, T)`, and similar -> JSON array of the rendering of each item.
-///  * Zig tuple -> JSON array of the rendering of each item.
-///  * Zig `struct` -> JSON object with each field in declaration order.
-///      * If the struct declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`. See `std.json.Value` for an example.
-///      * See `StringifyOptions.emit_null_optional_fields`.
-///  * Zig `union(enum)` -> JSON object with one field named for the active tag and a value representing the payload.
-///      * If the payload is `void`, then the emitted value is `{}`.
-///      * If the union declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`.
-///  * Zig `enum` -> JSON string naming the active tag.
-///      * If the enum declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`.
-///  * Zig untyped enum literal -> JSON string naming the active tag.
-///  * Zig error -> JSON string naming the error.
-///  * Zig `*T` -> the rendering of `T`. Note there is no guard against circular-reference infinite recursion.
-///
+/// The `safety_checks_hint` parameter determines how much memory is used to enable assertions that the above grammar is being followed,
+/// e.g. tripping an assertion rather than allowing `endObject` to emit the final `}` in `[[[]]}`.
+/// "Depth" in this context means the depth of nested `[]` or `{}` expressions
+/// (or equivalently the amount of recursion on the `<value>` grammar expression above).
+/// For example, emitting the JSON `[[[]]]` requires a depth of 3.
+/// If `.checked_to_fixed_depth` is used, there is additionally an assertion that the nesting depth never exceeds the given limit.
+/// `.checked_to_arbitrary_depth` requires a runtime allocator for the memory.
+/// `.checked_to_fixed_depth` embeds the storage required in the `WriteStream` struct.
+/// `.assumed_correct` requires no space and performs none of these assertions.
 /// In `ReleaseFast` and `ReleaseSmall` mode, the given `safety_checks_hint` is ignored and is always treated as `.assumed_correct`.
 pub fn WriteStream(
     comptime OutStream: type,
@@ -197,10 +184,14 @@ pub fn WriteStream(
 ) type {
     return struct {
         const Self = @This();
-        const safety_checks: @TypeOf(safety_checks_hint) = switch (@import("builtin").mode) {
-            .Debug, .ReleaseSafe => safety_checks_hint,
-            .ReleaseFast, .ReleaseSmall => .assumed_correct,
+        const build_mode_has_safety = switch (@import("builtin").mode) {
+            .Debug, .ReleaseSafe => true,
+            .ReleaseFast, .ReleaseSmall => false,
         };
+        const safety_checks: @TypeOf(safety_checks_hint) = if (build_mode_has_safety)
+            safety_checks_hint
+        else
+            .assumed_correct;
 
         pub const Stream = OutStream;
         pub const Error = switch (safety_checks) {
@@ -225,6 +216,11 @@ pub fn WriteStream(
             .assumed_correct => void,
         },
 
+        raw_streaming_mode: if (build_mode_has_safety)
+            enum { none, value, objectField }
+        else
+            void = if (build_mode_has_safety) .none else {},
+
         pub fn init(safety_allocator: Allocator, stream: OutStream, options: StringifyOptions) Self {
             return .{
                 .options = options,
@@ -237,6 +233,7 @@ pub fn WriteStream(
             };
         }
 
+        /// Only necessary with .checked_to_arbitrary_depth.
         pub fn deinit(self: *Self) void {
             switch (safety_checks) {
                 .checked_to_arbitrary_depth => self.nesting_stack.deinit(),
@@ -246,6 +243,7 @@ pub fn WriteStream(
         }
 
         pub fn beginArray(self: *Self) Error!void {
+            if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
             try self.valueStart();
             try self.stream.writeByte('[');
             try self.pushIndentation(ARRAY_MODE);
@@ -253,6 +251,7 @@ pub fn WriteStream(
         }
 
         pub fn beginObject(self: *Self) Error!void {
+            if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
             try self.valueStart();
             try self.stream.writeByte('{');
             try self.pushIndentation(OBJECT_MODE);
@@ -260,6 +259,7 @@ pub fn WriteStream(
         }
 
         pub fn endArray(self: *Self) Error!void {
+            if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
             self.popIndentation(ARRAY_MODE);
             switch (self.next_punctuation) {
                 .none => {},
@@ -273,6 +273,7 @@ pub fn WriteStream(
         }
 
         pub fn endObject(self: *Self) Error!void {
+            if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
             self.popIndentation(OBJECT_MODE);
             switch (self.next_punctuation) {
                 .none => {},
@@ -389,16 +390,39 @@ pub fn WriteStream(
         /// e.g. `"1"`, `"[]"`, `"[1,2]"`, not `"1,2"`.
         /// This function may be useful for doing your own number formatting.
         pub fn print(self: *Self, comptime fmt: []const u8, args: anytype) Error!void {
+            if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
             try self.valueStart();
             try self.stream.print(fmt, args);
             self.valueDone();
         }
 
+        /// An alternative to calling `write` that allows you to write directly to the `.stream` field, e.g. with `.stream.writeAll()`.
+        /// Call `beginWriteRaw()`, then write a complete value (including any quotes if necessary) directly to the `.stream` field,
+        /// then call `endWriteRaw()`.
+        /// This can be useful for streaming very long strings into the output without needing it all buffered in memory.
+        pub fn beginWriteRaw(self: *Self) !void {
+            if (build_mode_has_safety) {
+                assert(self.raw_streaming_mode == .none);
+                self.raw_streaming_mode = .value;
+            }
+            try self.valueStart();
+        }
+
+        /// See `beginWriteRaw`.
+        pub fn endWriteRaw(self: *Self) void {
+            if (build_mode_has_safety) {
+                assert(self.raw_streaming_mode == .value);
+                self.raw_streaming_mode = .none;
+            }
+            self.valueDone();
+        }
+
         /// See `WriteStream` for when to call this method.
         /// `key` is the string content of the property name.
         /// Surrounding quotes will be added and any special characters will be escaped.
         /// See also `objectFieldRaw`.
         pub fn objectField(self: *Self, key: []const u8) Error!void {
+            if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
             try self.objectFieldStart();
             try encodeJsonString(key, self.options, self.stream);
             self.next_punctuation = .colon;
@@ -408,14 +432,65 @@ pub fn WriteStream(
         /// A few assertions are performed on the given value to ensure that the caller of this function understands the API contract.
         /// See also `objectField`.
         pub fn objectFieldRaw(self: *Self, quoted_key: []const u8) Error!void {
+            if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
             assert(quoted_key.len >= 2 and quoted_key[0] == '"' and quoted_key[quoted_key.len - 1] == '"'); // quoted_key should be "quoted".
             try self.objectFieldStart();
             try self.stream.writeAll(quoted_key);
             self.next_punctuation = .colon;
         }
 
-        /// See `WriteStream`.
+        /// In the rare case that you need to write very long object field names,
+        /// this is an alternative to `objectField` and `objectFieldRaw` that allows you to write directly to the `.stream` field
+        /// similar to `beginWriteRaw`.
+        /// Call `endObjectFieldRaw()` when you're done.
+        pub fn beginObjectFieldRaw(self: *Self) !void {
+            if (build_mode_has_safety) {
+                assert(self.raw_streaming_mode == .none);
+                self.raw_streaming_mode = .objectField;
+            }
+            try self.objectFieldStart();
+        }
+
+        /// See `beginObjectFieldRaw`.
+        pub fn endObjectFieldRaw(self: *Self) void {
+            if (build_mode_has_safety) {
+                assert(self.raw_streaming_mode == .objectField);
+                self.raw_streaming_mode = .none;
+            }
+            self.next_punctuation = .colon;
+        }
+
+        /// Renders the given Zig value as JSON.
+        ///
+        /// Supported types:
+        ///  * Zig `bool` -> JSON `true` or `false`.
+        ///  * Zig `?T` -> `null` or the rendering of `T`.
+        ///  * Zig `i32`, `u64`, etc. -> JSON number or string.
+        ///      * When option `emit_nonportable_numbers_as_strings` is true, if the value is outside the range `+-1<<53` (the precise integer range of f64), it is rendered as a JSON string in base 10. Otherwise, it is rendered as JSON number.
+        ///  * Zig floats -> JSON number or string.
+        ///      * If the value cannot be precisely represented by an f64, it is rendered as a JSON string. Otherwise, it is rendered as JSON number.
+        ///      * TODO: Float rendering will likely change in the future, e.g. to remove the unnecessary "e+00".
+        ///  * Zig `[]const u8`, `[]u8`, `*[N]u8`, `@Vector(N, u8)`, and similar -> JSON string.
+        ///      * See `StringifyOptions.emit_strings_as_arrays`.
+        ///      * If the content is not valid UTF-8, rendered as an array of numbers instead.
+        ///  * Zig `[]T`, `[N]T`, `*[N]T`, `@Vector(N, T)`, and similar -> JSON array of the rendering of each item.
+        ///  * Zig tuple -> JSON array of the rendering of each item.
+        ///  * Zig `struct` -> JSON object with each field in declaration order.
+        ///      * If the struct declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`. See `std.json.Value` for an example.
+        ///      * See `StringifyOptions.emit_null_optional_fields`.
+        ///  * Zig `union(enum)` -> JSON object with one field named for the active tag and a value representing the payload.
+        ///      * If the payload is `void`, then the emitted value is `{}`.
+        ///      * If the union declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`.
+        ///  * Zig `enum` -> JSON string naming the active tag.
+        ///      * If the enum declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`.
+        ///  * Zig untyped enum literal -> JSON string naming the active tag.
+        ///  * Zig error -> JSON string naming the error.
+        ///  * Zig `*T` -> the rendering of `T`. Note there is no guard against circular-reference infinite recursion.
+        ///
+        /// See also alternative functions `print` and `beginWriteRaw`.
+        /// For writing object field names, use `objectField` instead.
         pub fn write(self: *Self, value: anytype) Error!void {
+            if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
             const T = @TypeOf(value);
             switch (@typeInfo(T)) {
                 .Int => {
lib/std/json/stringify_test.zig
@@ -443,3 +443,53 @@ test "nonportable numbers" {
     try testStringify("9999999999999999", 9999999999999999, .{});
     try testStringify("\"9999999999999999\"", 9999999999999999, .{ .emit_nonportable_numbers_as_strings = true });
 }
+
+test "stringify raw streaming" {
+    var out_buf: [1024]u8 = undefined;
+    var slice_stream = std.io.fixedBufferStream(&out_buf);
+    const out = slice_stream.writer();
+
+    {
+        var w = writeStream(out, .{ .whitespace = .indent_2 });
+        try testRawStreaming(&w, &slice_stream);
+    }
+
+    {
+        var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, 8);
+        try testRawStreaming(&w, &slice_stream);
+    }
+
+    {
+        var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, null);
+        try testRawStreaming(&w, &slice_stream);
+    }
+
+    {
+        var w = writeStreamArbitraryDepth(testing.allocator, out, .{ .whitespace = .indent_2 });
+        defer w.deinit();
+        try testRawStreaming(&w, &slice_stream);
+    }
+}
+
+fn testRawStreaming(w: anytype, slice_stream: anytype) !void {
+    slice_stream.reset();
+
+    try w.beginObject();
+    try w.beginObjectFieldRaw();
+    try w.stream.writeAll("\"long");
+    try w.stream.writeAll(" key\"");
+    w.endObjectFieldRaw();
+    try w.beginWriteRaw();
+    try w.stream.writeAll("\"long");
+    try w.stream.writeAll(" value\"");
+    w.endWriteRaw();
+    try w.endObject();
+
+    const result = slice_stream.getWritten();
+    const expected =
+        \\{
+        \\  "long key": "long value"
+        \\}
+    ;
+    try std.testing.expectEqualStrings(expected, result);
+}