Commit c30df072bd
Changed files (14)
lib/std/json/dynamic.zig
@@ -4,17 +4,12 @@ const ArenaAllocator = std.heap.ArenaAllocator;
const ArrayList = std.ArrayList;
const StringArrayHashMap = std.StringArrayHashMap;
const Allocator = std.mem.Allocator;
-
-const StringifyOptions = @import("./stringify.zig").StringifyOptions;
-const stringify = @import("./stringify.zig").stringify;
+const json = std.json;
const ParseOptions = @import("./static.zig").ParseOptions;
const ParseError = @import("./static.zig").ParseError;
-const JsonScanner = @import("./scanner.zig").Scanner;
-const AllocWhen = @import("./scanner.zig").AllocWhen;
-const Token = @import("./scanner.zig").Token;
-const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger;
+const isNumberFormattedLikeAnInteger = @import("Scanner.zig").isNumberFormattedLikeAnInteger;
pub const ObjectMap = StringArrayHashMap(Value);
pub const Array = ArrayList(Value);
@@ -52,12 +47,11 @@ pub const Value = union(enum) {
}
}
- pub fn dump(self: Value) void {
- std.debug.lockStdErr();
- defer std.debug.unlockStdErr();
+ pub fn dump(v: Value) void {
+ const w = std.debug.lockStderrWriter(&.{});
+ defer std.debug.unlockStderrWriter();
- const stderr = std.fs.File.stderr().deprecatedWriter();
- stringify(self, .{}, stderr) catch return;
+ json.Stringify.value(v, .{}, w) catch return;
}
pub fn jsonStringify(value: @This(), jws: anytype) !void {
lib/std/json/dynamic_test.zig
@@ -1,8 +1,10 @@
const std = @import("std");
+const json = std.json;
const mem = std.mem;
const testing = std.testing;
const ArenaAllocator = std.heap.ArenaAllocator;
const Allocator = std.mem.Allocator;
+const Writer = std.io.Writer;
const ObjectMap = @import("dynamic.zig").ObjectMap;
const Array = @import("dynamic.zig").Array;
@@ -14,8 +16,7 @@ const parseFromTokenSource = @import("static.zig").parseFromTokenSource;
const parseFromValueLeaky = @import("static.zig").parseFromValueLeaky;
const ParseOptions = @import("static.zig").ParseOptions;
-const jsonReader = @import("scanner.zig").reader;
-const JsonReader = @import("scanner.zig").Reader;
+const Scanner = @import("Scanner.zig");
test "json.parser.dynamic" {
const s =
@@ -70,14 +71,10 @@ test "json.parser.dynamic" {
try testing.expect(mem.eql(u8, large_int.number_string, "18446744073709551615"));
}
-const writeStream = @import("./stringify.zig").writeStream;
test "write json then parse it" {
var out_buffer: [1000]u8 = undefined;
-
- var fixed_buffer_stream = std.io.fixedBufferStream(&out_buffer);
- const out_stream = fixed_buffer_stream.writer();
- var jw = writeStream(out_stream, .{});
- defer jw.deinit();
+ var fixed_writer: Writer = .fixed(&out_buffer);
+ var jw: json.Stringify = .{ .writer = &fixed_writer, .options = .{} };
try jw.beginObject();
@@ -101,8 +98,8 @@ test "write json then parse it" {
try jw.endObject();
- fixed_buffer_stream = std.io.fixedBufferStream(fixed_buffer_stream.getWritten());
- var json_reader = jsonReader(testing.allocator, fixed_buffer_stream.reader());
+ var fbs: std.Io.Reader = .fixed(fixed_writer.buffered());
+ var json_reader: Scanner.Reader = .init(testing.allocator, &fbs);
defer json_reader.deinit();
var parsed = try parseFromTokenSource(Value, testing.allocator, &json_reader, .{});
defer parsed.deinit();
@@ -242,10 +239,9 @@ test "Value.jsonStringify" {
.{ .object = obj },
};
var buffer: [0x1000]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buffer);
+ var fixed_writer: Writer = .fixed(&buffer);
- var jw = writeStream(fbs.writer(), .{ .whitespace = .indent_1 });
- defer jw.deinit();
+ var jw: json.Stringify = .{ .writer = &fixed_writer, .options = .{ .whitespace = .indent_1 } };
try jw.write(array);
const expected =
@@ -266,7 +262,7 @@ test "Value.jsonStringify" {
\\ }
\\]
;
- try testing.expectEqualStrings(expected, fbs.getWritten());
+ try testing.expectEqualStrings(expected, fixed_writer.buffered());
}
test "parseFromValue(std.json.Value,...)" {
@@ -334,8 +330,8 @@ test "polymorphic parsing" {
test "long object value" {
const value = "01234567890123456789";
const doc = "{\"key\":\"" ++ value ++ "\"}";
- var fbs = std.io.fixedBufferStream(doc);
- var reader = smallBufferJsonReader(testing.allocator, fbs.reader());
+ var fbs: std.Io.Reader = .fixed(doc);
+ var reader = smallBufferJsonReader(testing.allocator, &fbs);
defer reader.deinit();
var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{});
defer parsed.deinit();
@@ -367,8 +363,8 @@ test "many object keys" {
\\ "k5": "v5"
\\}
;
- var fbs = std.io.fixedBufferStream(doc);
- var reader = smallBufferJsonReader(testing.allocator, fbs.reader());
+ var fbs: std.Io.Reader = .fixed(doc);
+ var reader = smallBufferJsonReader(testing.allocator, &fbs);
defer reader.deinit();
var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{});
defer parsed.deinit();
@@ -382,8 +378,8 @@ test "many object keys" {
test "negative zero" {
const doc = "-0";
- var fbs = std.io.fixedBufferStream(doc);
- var reader = smallBufferJsonReader(testing.allocator, fbs.reader());
+ var fbs: std.Io.Reader = .fixed(doc);
+ var reader = smallBufferJsonReader(testing.allocator, &fbs);
defer reader.deinit();
var parsed = try parseFromTokenSource(Value, testing.allocator, &reader, .{});
defer parsed.deinit();
@@ -391,6 +387,6 @@ test "negative zero" {
try testing.expect(std.math.isNegativeZero(parsed.value.float));
}
-fn smallBufferJsonReader(allocator: Allocator, io_reader: anytype) JsonReader(16, @TypeOf(io_reader)) {
- return JsonReader(16, @TypeOf(io_reader)).init(allocator, io_reader);
+fn smallBufferJsonReader(allocator: Allocator, io_reader: anytype) Scanner.Reader {
+ return .init(allocator, io_reader);
}
lib/std/json/fmt.zig
@@ -1,40 +0,0 @@
-const std = @import("../std.zig");
-const assert = std.debug.assert;
-
-const stringify = @import("stringify.zig").stringify;
-const StringifyOptions = @import("stringify.zig").StringifyOptions;
-
-/// Returns a formatter that formats the given value using stringify.
-pub fn fmt(value: anytype, options: StringifyOptions) Formatter(@TypeOf(value)) {
- return Formatter(@TypeOf(value)){ .value = value, .options = options };
-}
-
-/// Formats the given value using stringify.
-pub fn Formatter(comptime T: type) type {
- return struct {
- value: T,
- options: StringifyOptions,
-
- pub fn format(self: @This(), writer: *std.io.Writer) std.io.Writer.Error!void {
- try stringify(self.value, self.options, writer);
- }
- };
-}
-
-test fmt {
- const expectFmt = std.testing.expectFmt;
- try expectFmt("123", "{}", .{fmt(@as(u32, 123), .{})});
- try expectFmt(
- \\{"num":927,"msg":"hello","sub":{"mybool":true}}
- , "{}", .{fmt(struct {
- num: u32,
- msg: []const u8,
- sub: struct {
- mybool: bool,
- },
- }{
- .num = 927,
- .msg = "hello",
- .sub = .{ .mybool = true },
- }, .{})});
-}
lib/std/json/hashmap_test.zig
@@ -1,4 +1,5 @@
const std = @import("std");
+const json = std.json;
const testing = std.testing;
const ArrayHashMap = @import("hashmap.zig").ArrayHashMap;
@@ -7,10 +8,9 @@ const parseFromSlice = @import("static.zig").parseFromSlice;
const parseFromSliceLeaky = @import("static.zig").parseFromSliceLeaky;
const parseFromTokenSource = @import("static.zig").parseFromTokenSource;
const parseFromValue = @import("static.zig").parseFromValue;
-const stringifyAlloc = @import("stringify.zig").stringifyAlloc;
const Value = @import("dynamic.zig").Value;
-const jsonReader = @import("./scanner.zig").reader;
+const Scanner = @import("Scanner.zig");
const T = struct {
i: i32,
@@ -39,8 +39,8 @@ test "parse json hashmap while streaming" {
\\ "xyz": {"i": 1, "s": "w"}
\\}
;
- var stream = std.io.fixedBufferStream(doc);
- var json_reader = jsonReader(testing.allocator, stream.reader());
+ var stream: std.Io.Reader = .fixed(doc);
+ var json_reader: Scanner.Reader = .init(testing.allocator, &stream);
var parsed = try parseFromTokenSource(
ArrayHashMap(T),
@@ -89,7 +89,7 @@ test "stringify json hashmap" {
var value = ArrayHashMap(T){};
defer value.deinit(testing.allocator);
{
- const doc = try stringifyAlloc(testing.allocator, value, .{});
+ const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{});
defer testing.allocator.free(doc);
try testing.expectEqualStrings("{}", doc);
}
@@ -98,7 +98,7 @@ test "stringify json hashmap" {
try value.map.put(testing.allocator, "xyz", .{ .i = 1, .s = "w" });
{
- const doc = try stringifyAlloc(testing.allocator, value, .{});
+ const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{});
defer testing.allocator.free(doc);
try testing.expectEqualStrings(
\\{"abc":{"i":0,"s":"d"},"xyz":{"i":1,"s":"w"}}
@@ -107,7 +107,7 @@ test "stringify json hashmap" {
try testing.expect(value.map.swapRemove("abc"));
{
- const doc = try stringifyAlloc(testing.allocator, value, .{});
+ const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{});
defer testing.allocator.free(doc);
try testing.expectEqualStrings(
\\{"xyz":{"i":1,"s":"w"}}
@@ -116,7 +116,7 @@ test "stringify json hashmap" {
try testing.expect(value.map.swapRemove("xyz"));
{
- const doc = try stringifyAlloc(testing.allocator, value, .{});
+ const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{});
defer testing.allocator.free(doc);
try testing.expectEqualStrings("{}", doc);
}
@@ -129,7 +129,7 @@ test "stringify json hashmap whitespace" {
try value.map.put(testing.allocator, "xyz", .{ .i = 1, .s = "w" });
{
- const doc = try stringifyAlloc(testing.allocator, value, .{ .whitespace = .indent_2 });
+ const doc = try json.Stringify.valueAlloc(testing.allocator, value, .{ .whitespace = .indent_2 });
defer testing.allocator.free(doc);
try testing.expectEqualStrings(
\\{
lib/std/json/scanner.zig
@@ -1,1776 +0,0 @@
-// Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259
-// * RFC 8259 requires JSON documents be valid UTF-8,
-// but makes an allowance for systems that are "part of a closed ecosystem".
-// I have no idea what that's supposed to mean in the context of a standard specification.
-// This implementation requires inputs to be valid UTF-8.
-// * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits,
-// but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed.
-// (RFC 5234 defines HEXDIG to only allow uppercase.)
-// * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value".
-// See http://www.unicode.org/glossary/#unicode_scalar_value .
-// * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences,
-// but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?),
-// which would mean that unpaired surrogate halves are forbidden.
-// By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to)
-// explicitly allows unpaired surrogate halves.
-// This implementation forbids unpaired surrogate halves in \u sequences.
-// If a high surrogate half appears in a \u sequence,
-// then a low surrogate half must immediately follow in \u notation.
-// * RFC 8259 allows implementations to "accept non-JSON forms or extensions".
-// This implementation does not accept any of that.
-// * RFC 8259 allows implementations to put limits on "the size of texts",
-// "the maximum depth of nesting", "the range and precision of numbers",
-// and "the length and character contents of strings".
-// This low-level implementation does not limit these,
-// except where noted above, and except that nesting depth requires memory allocation.
-// Note that this low-level API does not interpret numbers numerically,
-// but simply emits their source form for some higher level code to make sense of.
-// * This low-level implementation allows duplicate object keys,
-// and key/value pairs are emitted in the order they appear in the input.
-
-const std = @import("std");
-
-const Allocator = std.mem.Allocator;
-const ArrayList = std.ArrayList;
-const assert = std.debug.assert;
-const BitStack = std.BitStack;
-
-/// Scan the input and check for malformed JSON.
-/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`.
-/// Returns any errors from the allocator as-is, which is unlikely,
-/// but can be caused by extreme nesting depth in the input.
-pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool {
- var scanner = Scanner.initCompleteInput(allocator, s);
- defer scanner.deinit();
-
- while (true) {
- const token = scanner.next() catch |err| switch (err) {
- error.SyntaxError, error.UnexpectedEndOfInput => return false,
- error.OutOfMemory => return error.OutOfMemory,
- error.BufferUnderrun => unreachable,
- };
- if (token == .end_of_document) break;
- }
-
- return true;
-}
-
-/// The parsing errors are divided into two categories:
-/// * `SyntaxError` is for clearly malformed JSON documents,
-/// such as giving an input document that isn't JSON at all.
-/// * `UnexpectedEndOfInput` is for signaling that everything's been
-/// valid so far, but the input appears to be truncated for some reason.
-/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`.
-pub const Error = error{ SyntaxError, UnexpectedEndOfInput };
-
-/// Calls `std.json.Reader` with `std.json.default_buffer_size`.
-pub fn reader(allocator: Allocator, io_reader: anytype) Reader(default_buffer_size, @TypeOf(io_reader)) {
- return Reader(default_buffer_size, @TypeOf(io_reader)).init(allocator, io_reader);
-}
-/// Used by `json.reader`.
-pub const default_buffer_size = 0x1000;
-
-/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar:
-/// ```
-/// <document> = <value> .end_of_document
-/// <value> =
-/// | <object>
-/// | <array>
-/// | <number>
-/// | <string>
-/// | .true
-/// | .false
-/// | .null
-/// <object> = .object_begin ( <string> <value> )* .object_end
-/// <array> = .array_begin ( <value> )* .array_end
-/// <number> = <It depends. See below.>
-/// <string> = <It depends. See below.>
-/// ```
-///
-/// What you get for `<number>` and `<string>` values depends on which `next*()` method you call:
-///
-/// ```
-/// next():
-/// <number> = ( .partial_number )* .number
-/// <string> = ( <partial_string> )* .string
-/// <partial_string> =
-/// | .partial_string
-/// | .partial_string_escaped_1
-/// | .partial_string_escaped_2
-/// | .partial_string_escaped_3
-/// | .partial_string_escaped_4
-///
-/// nextAlloc*(..., .alloc_always):
-/// <number> = .allocated_number
-/// <string> = .allocated_string
-///
-/// nextAlloc*(..., .alloc_if_needed):
-/// <number> =
-/// | .number
-/// | .allocated_number
-/// <string> =
-/// | .string
-/// | .allocated_string
-/// ```
-///
-/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value.
-/// For number values, this is the representation of the number exactly as it appears in the input.
-/// For strings, this is the content of the string after resolving escape sequences.
-///
-/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator.
-/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations.
-///
-/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences.
-/// To get a complete value in memory, you need to concatenate the values yourself.
-/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result.
-///
-/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer.
-/// The memory may become undefined during the next call to `json.Scanner.feedInput()`
-/// or any `json.Reader` method whose return error set includes `json.Error`.
-/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`,
-/// which makes a copy for you.
-///
-/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that
-/// the previously partial value is completed with no additional bytes.
-/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.)
-/// `.partial_*` tokens never have `0` length.
-///
-/// The recommended strategy for using the different `next*()` methods is something like this:
-///
-/// When you're expecting an object key, use `.alloc_if_needed`.
-/// You often don't need a copy of the key string to persist; you might just check which field it is.
-/// In the case that the key happens to require an allocation, free it immediately after checking it.
-///
-/// When you're expecting a meaningful string value (such as on the right of a `:`),
-/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document.
-///
-/// When you're expecting a number value, use `.alloc_if_needed`.
-/// You're probably going to be parsing the string representation of the number into a numeric representation,
-/// so you need the complete string representation only temporarily.
-///
-/// When you're skipping an unrecognized value, use `skipValue()`.
-pub const Token = union(enum) {
- object_begin,
- object_end,
- array_begin,
- array_end,
-
- true,
- false,
- null,
-
- number: []const u8,
- partial_number: []const u8,
- allocated_number: []u8,
-
- string: []const u8,
- partial_string: []const u8,
- partial_string_escaped_1: [1]u8,
- partial_string_escaped_2: [2]u8,
- partial_string_escaped_3: [3]u8,
- partial_string_escaped_4: [4]u8,
- allocated_string: []u8,
-
- end_of_document,
-};
-
-/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call.
-pub const TokenType = enum {
- object_begin,
- object_end,
- array_begin,
- array_end,
- true,
- false,
- null,
- number,
- string,
- end_of_document,
-};
-
-/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);`
-/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized.
-/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()`
-/// to get meaningful information from this.
-pub const Diagnostics = struct {
- line_number: u64 = 1,
- line_start_cursor: usize = @as(usize, @bitCast(@as(isize, -1))), // Start just "before" the input buffer to get a 1-based column for line 1.
- total_bytes_before_current_input: u64 = 0,
- cursor_pointer: *const usize = undefined,
-
- /// Starts at 1.
- pub fn getLine(self: *const @This()) u64 {
- return self.line_number;
- }
- /// Starts at 1.
- pub fn getColumn(self: *const @This()) u64 {
- return self.cursor_pointer.* -% self.line_start_cursor;
- }
- /// Starts at 0. Measures the byte offset since the start of the input.
- pub fn getByteOffset(self: *const @This()) u64 {
- return self.total_bytes_before_current_input + self.cursor_pointer.*;
- }
-};
-
-/// See the documentation for `std.json.Token`.
-pub const AllocWhen = enum { alloc_if_needed, alloc_always };
-
-/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default.
-/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`.
-pub const default_max_value_len = 4 * 1024 * 1024;
-
-/// Connects a `std.io.GenericReader` to a `std.json.Scanner`.
-/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader.
-pub fn Reader(comptime buffer_size: usize, comptime ReaderType: type) type {
- return struct {
- scanner: Scanner,
- reader: ReaderType,
-
- buffer: [buffer_size]u8 = undefined,
-
- /// The allocator is only used to track `[]` and `{}` nesting levels.
- pub fn init(allocator: Allocator, io_reader: ReaderType) @This() {
- return .{
- .scanner = Scanner.initStreaming(allocator),
- .reader = io_reader,
- };
- }
- pub fn deinit(self: *@This()) void {
- self.scanner.deinit();
- self.* = undefined;
- }
-
- /// Calls `std.json.Scanner.enableDiagnostics`.
- pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
- self.scanner.enableDiagnostics(diagnostics);
- }
-
- pub const NextError = ReaderType.Error || Error || Allocator.Error;
- pub const SkipError = NextError;
- pub const AllocError = NextError || error{ValueTooLong};
- pub const PeekError = ReaderType.Error || Error;
-
- /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
- /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
- pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
- return self.nextAllocMax(allocator, when, default_max_value_len);
- }
- /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
- pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
- const token_type = try self.peekNextTokenType();
- switch (token_type) {
- .number, .string => {
- var value_list = ArrayList(u8).init(allocator);
- errdefer {
- value_list.deinit();
- }
- if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| {
- return if (token_type == .number)
- Token{ .number = slice }
- else
- Token{ .string = slice };
- } else {
- return if (token_type == .number)
- Token{ .allocated_number = try value_list.toOwnedSlice() }
- else
- Token{ .allocated_string = try value_list.toOwnedSlice() };
- }
- },
-
- // Simple tokens never alloc.
- .object_begin,
- .object_end,
- .array_begin,
- .array_end,
- .true,
- .false,
- .null,
- .end_of_document,
- => return try self.next(),
- }
- }
-
- /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
- pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocError!?[]const u8 {
- return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
- }
- /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`.
- pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocError!?[]const u8 {
- while (true) {
- return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) {
- error.BufferUnderrun => {
- try self.refillBuffer();
- continue;
- },
- else => |other_err| return other_err,
- };
- }
- }
-
- /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`.
- pub fn skipValue(self: *@This()) SkipError!void {
- switch (try self.peekNextTokenType()) {
- .object_begin, .array_begin => {
- try self.skipUntilStackHeight(self.stackHeight());
- },
- .number, .string => {
- while (true) {
- switch (try self.next()) {
- .partial_number,
- .partial_string,
- .partial_string_escaped_1,
- .partial_string_escaped_2,
- .partial_string_escaped_3,
- .partial_string_escaped_4,
- => continue,
-
- .number, .string => break,
-
- else => unreachable,
- }
- }
- },
- .true, .false, .null => {
- _ = try self.next();
- },
-
- .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
- }
- }
- /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`.
- pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void {
- while (true) {
- return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) {
- error.BufferUnderrun => {
- try self.refillBuffer();
- continue;
- },
- else => |other_err| return other_err,
- };
- }
- }
-
- /// Calls `std.json.Scanner.stackHeight`.
- pub fn stackHeight(self: *const @This()) usize {
- return self.scanner.stackHeight();
- }
- /// Calls `std.json.Scanner.ensureTotalStackCapacity`.
- pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void {
- try self.scanner.ensureTotalStackCapacity(height);
- }
-
- /// See `std.json.Token` for documentation of this function.
- pub fn next(self: *@This()) NextError!Token {
- while (true) {
- return self.scanner.next() catch |err| switch (err) {
- error.BufferUnderrun => {
- try self.refillBuffer();
- continue;
- },
- else => |other_err| return other_err,
- };
- }
- }
-
- /// See `std.json.Scanner.peekNextTokenType()`.
- pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
- while (true) {
- return self.scanner.peekNextTokenType() catch |err| switch (err) {
- error.BufferUnderrun => {
- try self.refillBuffer();
- continue;
- },
- else => |other_err| return other_err,
- };
- }
- }
-
- fn refillBuffer(self: *@This()) ReaderType.Error!void {
- const input = self.buffer[0..try self.reader.read(self.buffer[0..])];
- if (input.len > 0) {
- self.scanner.feedInput(input);
- } else {
- self.scanner.endInput();
- }
- }
- };
-}
-
-/// The lowest level parsing API in this package;
-/// supports streaming input with a low memory footprint.
-/// The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input.
-/// Specifically `d/8` bytes are required for this purpose,
-/// with some extra buffer according to the implementation of `std.ArrayList`.
-///
-/// This scanner can emit partial tokens; see `std.json.Token`.
-/// The input to this class is a sequence of input buffers that you must supply one at a time.
-/// Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned.
-/// Then call `feedInput()` again and so forth.
-/// Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`,
-/// or when `error.BufferUnderrun` requests more data and there is no more.
-/// Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned.
-pub const Scanner = struct {
- state: State = .value,
- string_is_object_key: bool = false,
- stack: BitStack,
- value_start: usize = undefined,
- utf16_code_units: [2]u16 = undefined,
-
- input: []const u8 = "",
- cursor: usize = 0,
- is_end_of_input: bool = false,
- diagnostics: ?*Diagnostics = null,
-
- /// The allocator is only used to track `[]` and `{}` nesting levels.
- pub fn initStreaming(allocator: Allocator) @This() {
- return .{
- .stack = BitStack.init(allocator),
- };
- }
- /// Use this if your input is a single slice.
- /// This is effectively equivalent to:
- /// ```
- /// initStreaming(allocator);
- /// feedInput(complete_input);
- /// endInput();
- /// ```
- pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() {
- return .{
- .stack = BitStack.init(allocator),
- .input = complete_input,
- .is_end_of_input = true,
- };
- }
- pub fn deinit(self: *@This()) void {
- self.stack.deinit();
- self.* = undefined;
- }
-
- pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
- diagnostics.cursor_pointer = &self.cursor;
- self.diagnostics = diagnostics;
- }
-
- /// Call this whenever you get `error.BufferUnderrun` from `next()`.
- /// When there is no more input to provide, call `endInput()`.
- pub fn feedInput(self: *@This(), input: []const u8) void {
- assert(self.cursor == self.input.len); // Not done with the last input slice.
- if (self.diagnostics) |diag| {
- diag.total_bytes_before_current_input += self.input.len;
- // This usually goes "negative" to measure how far before the beginning
- // of the new buffer the current line started.
- diag.line_start_cursor -%= self.cursor;
- }
- self.input = input;
- self.cursor = 0;
- self.value_start = 0;
- }
- /// Call this when you will no longer call `feedInput()` anymore.
- /// This can be called either immediately after the last `feedInput()`,
- /// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`.
- /// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`.
- pub fn endInput(self: *@This()) void {
- self.is_end_of_input = true;
- }
-
- pub const NextError = Error || Allocator.Error || error{BufferUnderrun};
- pub const AllocError = Error || Allocator.Error || error{ValueTooLong};
- pub const PeekError = Error || error{BufferUnderrun};
- pub const SkipError = Error || Allocator.Error;
- pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun};
-
- /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
- /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
- /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
- pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
- return self.nextAllocMax(allocator, when, default_max_value_len);
- }
-
- /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
- /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
- pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
- assert(self.is_end_of_input); // This function is not available in streaming mode.
- const token_type = self.peekNextTokenType() catch |e| switch (e) {
- error.BufferUnderrun => unreachable,
- else => |err| return err,
- };
- switch (token_type) {
- .number, .string => {
- var value_list = ArrayList(u8).init(allocator);
- errdefer {
- value_list.deinit();
- }
- if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) {
- error.BufferUnderrun => unreachable,
- else => |err| return err,
- }) |slice| {
- return if (token_type == .number)
- Token{ .number = slice }
- else
- Token{ .string = slice };
- } else {
- return if (token_type == .number)
- Token{ .allocated_number = try value_list.toOwnedSlice() }
- else
- Token{ .allocated_string = try value_list.toOwnedSlice() };
- }
- },
-
- // Simple tokens never alloc.
- .object_begin,
- .object_end,
- .array_begin,
- .array_end,
- .true,
- .false,
- .null,
- .end_of_document,
- => return self.next() catch |e| switch (e) {
- error.BufferUnderrun => unreachable,
- else => |err| return err,
- },
- }
- }
-
- /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
- pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 {
- return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
- }
- /// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`.
- /// When allocation is not necessary with `.alloc_if_needed`,
- /// this method returns the content slice from the input buffer, and `value_list` is not touched.
- /// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`,
- /// and returns `null` once the final `.number` or `.string` token has been written into it.
- /// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list.
- /// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation
- /// can be resumed by passing the same array list in again.
- /// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type;
- /// the caller of this method is expected to know which type of token is being processed.
- pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 {
- while (true) {
- const token = try self.next();
- switch (token) {
- // Accumulate partial values.
- .partial_number, .partial_string => |slice| {
- try appendSlice(value_list, slice, max_value_len);
- },
- .partial_string_escaped_1 => |buf| {
- try appendSlice(value_list, buf[0..], max_value_len);
- },
- .partial_string_escaped_2 => |buf| {
- try appendSlice(value_list, buf[0..], max_value_len);
- },
- .partial_string_escaped_3 => |buf| {
- try appendSlice(value_list, buf[0..], max_value_len);
- },
- .partial_string_escaped_4 => |buf| {
- try appendSlice(value_list, buf[0..], max_value_len);
- },
-
- // Return complete values.
- .number => |slice| {
- if (when == .alloc_if_needed and value_list.items.len == 0) {
- // No alloc necessary.
- return slice;
- }
- try appendSlice(value_list, slice, max_value_len);
- // The token is complete.
- return null;
- },
- .string => |slice| {
- if (when == .alloc_if_needed and value_list.items.len == 0) {
- // No alloc necessary.
- return slice;
- }
- try appendSlice(value_list, slice, max_value_len);
- // The token is complete.
- return null;
- },
-
- .object_begin,
- .object_end,
- .array_begin,
- .array_end,
- .true,
- .false,
- .null,
- .end_of_document,
- => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this.
-
- .allocated_number, .allocated_string => unreachable,
- }
- }
- }
-
- /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
- /// If the next token type is `.object_begin` or `.array_begin`,
- /// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found.
- /// If the next token type is `.number` or `.string`,
- /// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found.
- /// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once.
- /// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`;
- /// see `peekNextTokenType()`.
- pub fn skipValue(self: *@This()) SkipError!void {
- assert(self.is_end_of_input); // This function is not available in streaming mode.
- switch (self.peekNextTokenType() catch |e| switch (e) {
- error.BufferUnderrun => unreachable,
- else => |err| return err,
- }) {
- .object_begin, .array_begin => {
- self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) {
- error.BufferUnderrun => unreachable,
- else => |err| return err,
- };
- },
- .number, .string => {
- while (true) {
- switch (self.next() catch |e| switch (e) {
- error.BufferUnderrun => unreachable,
- else => |err| return err,
- }) {
- .partial_number,
- .partial_string,
- .partial_string_escaped_1,
- .partial_string_escaped_2,
- .partial_string_escaped_3,
- .partial_string_escaped_4,
- => continue,
-
- .number, .string => break,
-
- else => unreachable,
- }
- }
- },
- .true, .false, .null => {
- _ = self.next() catch |e| switch (e) {
- error.BufferUnderrun => unreachable,
- else => |err| return err,
- };
- },
-
- .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
- }
- }
-
- /// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height.
- /// Unlike `skipValue()`, this function is available in streaming mode.
- pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void {
- while (true) {
- switch (try self.next()) {
- .object_end, .array_end => {
- if (self.stackHeight() == terminal_stack_height) break;
- },
- .end_of_document => unreachable,
- else => continue,
- }
- }
- }
-
- /// The depth of `{}` or `[]` nesting levels at the current position.
- pub fn stackHeight(self: *const @This()) usize {
- return self.stack.bit_len;
- }
-
- /// Pre allocate memory to hold the given number of nesting levels.
- /// `stackHeight()` up to the given number will not cause allocations.
- pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void {
- try self.stack.ensureTotalCapacity(height);
- }
-
- /// See `std.json.Token` for documentation of this function.
- pub fn next(self: *@This()) NextError!Token {
- state_loop: while (true) {
- switch (self.state) {
- .value => {
- switch (try self.skipWhitespaceExpectByte()) {
- // Object, Array
- '{' => {
- try self.stack.push(OBJECT_MODE);
- self.cursor += 1;
- self.state = .object_start;
- return .object_begin;
- },
- '[' => {
- try self.stack.push(ARRAY_MODE);
- self.cursor += 1;
- self.state = .array_start;
- return .array_begin;
- },
-
- // String
- '"' => {
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- continue :state_loop;
- },
-
- // Number
- '1'...'9' => {
- self.value_start = self.cursor;
- self.cursor += 1;
- self.state = .number_int;
- continue :state_loop;
- },
- '0' => {
- self.value_start = self.cursor;
- self.cursor += 1;
- self.state = .number_leading_zero;
- continue :state_loop;
- },
- '-' => {
- self.value_start = self.cursor;
- self.cursor += 1;
- self.state = .number_minus;
- continue :state_loop;
- },
-
- // literal values
- 't' => {
- self.cursor += 1;
- self.state = .literal_t;
- continue :state_loop;
- },
- 'f' => {
- self.cursor += 1;
- self.state = .literal_f;
- continue :state_loop;
- },
- 'n' => {
- self.cursor += 1;
- self.state = .literal_n;
- continue :state_loop;
- },
-
- else => return error.SyntaxError,
- }
- },
-
- .post_value => {
- if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
-
- const c = self.input[self.cursor];
- if (self.string_is_object_key) {
- self.string_is_object_key = false;
- switch (c) {
- ':' => {
- self.cursor += 1;
- self.state = .value;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- }
-
- switch (c) {
- '}' => {
- if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError;
- self.cursor += 1;
- // stay in .post_value state.
- return .object_end;
- },
- ']' => {
- if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError;
- self.cursor += 1;
- // stay in .post_value state.
- return .array_end;
- },
- ',' => {
- switch (self.stack.peek()) {
- OBJECT_MODE => {
- self.state = .object_post_comma;
- },
- ARRAY_MODE => {
- self.state = .value;
- },
- }
- self.cursor += 1;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
-
- .object_start => {
- switch (try self.skipWhitespaceExpectByte()) {
- '"' => {
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- self.string_is_object_key = true;
- continue :state_loop;
- },
- '}' => {
- self.cursor += 1;
- _ = self.stack.pop();
- self.state = .post_value;
- return .object_end;
- },
- else => return error.SyntaxError,
- }
- },
- .object_post_comma => {
- switch (try self.skipWhitespaceExpectByte()) {
- '"' => {
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- self.string_is_object_key = true;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
-
- .array_start => {
- switch (try self.skipWhitespaceExpectByte()) {
- ']' => {
- self.cursor += 1;
- _ = self.stack.pop();
- self.state = .post_value;
- return .array_end;
- },
- else => {
- self.state = .value;
- continue :state_loop;
- },
- }
- },
-
- .number_minus => {
- if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
- switch (self.input[self.cursor]) {
- '0' => {
- self.cursor += 1;
- self.state = .number_leading_zero;
- continue :state_loop;
- },
- '1'...'9' => {
- self.cursor += 1;
- self.state = .number_int;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .number_leading_zero => {
- if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true);
- switch (self.input[self.cursor]) {
- '.' => {
- self.cursor += 1;
- self.state = .number_post_dot;
- continue :state_loop;
- },
- 'e', 'E' => {
- self.cursor += 1;
- self.state = .number_post_e;
- continue :state_loop;
- },
- else => {
- self.state = .post_value;
- return Token{ .number = self.takeValueSlice() };
- },
- }
- },
- .number_int => {
- while (self.cursor < self.input.len) : (self.cursor += 1) {
- switch (self.input[self.cursor]) {
- '0'...'9' => continue,
- '.' => {
- self.cursor += 1;
- self.state = .number_post_dot;
- continue :state_loop;
- },
- 'e', 'E' => {
- self.cursor += 1;
- self.state = .number_post_e;
- continue :state_loop;
- },
- else => {
- self.state = .post_value;
- return Token{ .number = self.takeValueSlice() };
- },
- }
- }
- return self.endOfBufferInNumber(true);
- },
- .number_post_dot => {
- if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
- switch (self.input[self.cursor]) {
- '0'...'9' => {
- self.cursor += 1;
- self.state = .number_frac;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .number_frac => {
- while (self.cursor < self.input.len) : (self.cursor += 1) {
- switch (self.input[self.cursor]) {
- '0'...'9' => continue,
- 'e', 'E' => {
- self.cursor += 1;
- self.state = .number_post_e;
- continue :state_loop;
- },
- else => {
- self.state = .post_value;
- return Token{ .number = self.takeValueSlice() };
- },
- }
- }
- return self.endOfBufferInNumber(true);
- },
- .number_post_e => {
- if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
- switch (self.input[self.cursor]) {
- '0'...'9' => {
- self.cursor += 1;
- self.state = .number_exp;
- continue :state_loop;
- },
- '+', '-' => {
- self.cursor += 1;
- self.state = .number_post_e_sign;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .number_post_e_sign => {
- if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
- switch (self.input[self.cursor]) {
- '0'...'9' => {
- self.cursor += 1;
- self.state = .number_exp;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .number_exp => {
- while (self.cursor < self.input.len) : (self.cursor += 1) {
- switch (self.input[self.cursor]) {
- '0'...'9' => continue,
- else => {
- self.state = .post_value;
- return Token{ .number = self.takeValueSlice() };
- },
- }
- }
- return self.endOfBufferInNumber(true);
- },
-
- .string => {
- while (self.cursor < self.input.len) : (self.cursor += 1) {
- switch (self.input[self.cursor]) {
- 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string.
-
- // ASCII plain text.
- 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue,
-
- // Special characters.
- '"' => {
- const result = Token{ .string = self.takeValueSlice() };
- self.cursor += 1;
- self.state = .post_value;
- return result;
- },
- '\\' => {
- const slice = self.takeValueSlice();
- self.cursor += 1;
- self.state = .string_backslash;
- if (slice.len > 0) return Token{ .partial_string = slice };
- continue :state_loop;
- },
-
- // UTF-8 validation.
- // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
- 0xC2...0xDF => {
- self.cursor += 1;
- self.state = .string_utf8_last_byte;
- continue :state_loop;
- },
- 0xE0 => {
- self.cursor += 1;
- self.state = .string_utf8_second_to_last_byte_guard_against_overlong;
- continue :state_loop;
- },
- 0xE1...0xEC, 0xEE...0xEF => {
- self.cursor += 1;
- self.state = .string_utf8_second_to_last_byte;
- continue :state_loop;
- },
- 0xED => {
- self.cursor += 1;
- self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half;
- continue :state_loop;
- },
- 0xF0 => {
- self.cursor += 1;
- self.state = .string_utf8_third_to_last_byte_guard_against_overlong;
- continue :state_loop;
- },
- 0xF1...0xF3 => {
- self.cursor += 1;
- self.state = .string_utf8_third_to_last_byte;
- continue :state_loop;
- },
- 0xF4 => {
- self.cursor += 1;
- self.state = .string_utf8_third_to_last_byte_guard_against_too_large;
- continue :state_loop;
- },
- 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8.
- }
- }
- if (self.is_end_of_input) return error.UnexpectedEndOfInput;
- const slice = self.takeValueSlice();
- if (slice.len > 0) return Token{ .partial_string = slice };
- return error.BufferUnderrun;
- },
- .string_backslash => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- '"', '\\', '/' => {
- // Since these characters now represent themselves literally,
- // we can simply begin the next plaintext slice here.
- self.value_start = self.cursor;
- self.cursor += 1;
- self.state = .string;
- continue :state_loop;
- },
- 'b' => {
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- return Token{ .partial_string_escaped_1 = [_]u8{0x08} };
- },
- 'f' => {
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- return Token{ .partial_string_escaped_1 = [_]u8{0x0c} };
- },
- 'n' => {
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- return Token{ .partial_string_escaped_1 = [_]u8{'\n'} };
- },
- 'r' => {
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- return Token{ .partial_string_escaped_1 = [_]u8{'\r'} };
- },
- 't' => {
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- return Token{ .partial_string_escaped_1 = [_]u8{'\t'} };
- },
- 'u' => {
- self.cursor += 1;
- self.state = .string_backslash_u;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .string_backslash_u => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- const c = self.input[self.cursor];
- switch (c) {
- '0'...'9' => {
- self.utf16_code_units[0] = @as(u16, c - '0') << 12;
- },
- 'A'...'F' => {
- self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12;
- },
- 'a'...'f' => {
- self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12;
- },
- else => return error.SyntaxError,
- }
- self.cursor += 1;
- self.state = .string_backslash_u_1;
- continue :state_loop;
- },
- .string_backslash_u_1 => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- const c = self.input[self.cursor];
- switch (c) {
- '0'...'9' => {
- self.utf16_code_units[0] |= @as(u16, c - '0') << 8;
- },
- 'A'...'F' => {
- self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8;
- },
- 'a'...'f' => {
- self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8;
- },
- else => return error.SyntaxError,
- }
- self.cursor += 1;
- self.state = .string_backslash_u_2;
- continue :state_loop;
- },
- .string_backslash_u_2 => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- const c = self.input[self.cursor];
- switch (c) {
- '0'...'9' => {
- self.utf16_code_units[0] |= @as(u16, c - '0') << 4;
- },
- 'A'...'F' => {
- self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4;
- },
- 'a'...'f' => {
- self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4;
- },
- else => return error.SyntaxError,
- }
- self.cursor += 1;
- self.state = .string_backslash_u_3;
- continue :state_loop;
- },
- .string_backslash_u_3 => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- const c = self.input[self.cursor];
- switch (c) {
- '0'...'9' => {
- self.utf16_code_units[0] |= c - '0';
- },
- 'A'...'F' => {
- self.utf16_code_units[0] |= c - 'A' + 10;
- },
- 'a'...'f' => {
- self.utf16_code_units[0] |= c - 'a' + 10;
- },
- else => return error.SyntaxError,
- }
- self.cursor += 1;
- if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) {
- self.state = .string_surrogate_half;
- continue :state_loop;
- } else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) {
- return error.SyntaxError; // Unexpected low surrogate half.
- } else {
- self.value_start = self.cursor;
- self.state = .string;
- return partialStringCodepoint(self.utf16_code_units[0]);
- }
- },
- .string_surrogate_half => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- '\\' => {
- self.cursor += 1;
- self.state = .string_surrogate_half_backslash;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Expected low surrogate half.
- }
- },
- .string_surrogate_half_backslash => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 'u' => {
- self.cursor += 1;
- self.state = .string_surrogate_half_backslash_u;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Expected low surrogate half.
- }
- },
- .string_surrogate_half_backslash_u => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 'D', 'd' => {
- self.cursor += 1;
- self.utf16_code_units[1] = 0xD << 12;
- self.state = .string_surrogate_half_backslash_u_1;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Expected low surrogate half.
- }
- },
- .string_surrogate_half_backslash_u_1 => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- const c = self.input[self.cursor];
- switch (c) {
- 'C'...'F' => {
- self.cursor += 1;
- self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8;
- self.state = .string_surrogate_half_backslash_u_2;
- continue :state_loop;
- },
- 'c'...'f' => {
- self.cursor += 1;
- self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8;
- self.state = .string_surrogate_half_backslash_u_2;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Expected low surrogate half.
- }
- },
- .string_surrogate_half_backslash_u_2 => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- const c = self.input[self.cursor];
- switch (c) {
- '0'...'9' => {
- self.cursor += 1;
- self.utf16_code_units[1] |= @as(u16, c - '0') << 4;
- self.state = .string_surrogate_half_backslash_u_3;
- continue :state_loop;
- },
- 'A'...'F' => {
- self.cursor += 1;
- self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4;
- self.state = .string_surrogate_half_backslash_u_3;
- continue :state_loop;
- },
- 'a'...'f' => {
- self.cursor += 1;
- self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4;
- self.state = .string_surrogate_half_backslash_u_3;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .string_surrogate_half_backslash_u_3 => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- const c = self.input[self.cursor];
- switch (c) {
- '0'...'9' => {
- self.utf16_code_units[1] |= c - '0';
- },
- 'A'...'F' => {
- self.utf16_code_units[1] |= c - 'A' + 10;
- },
- 'a'...'f' => {
- self.utf16_code_units[1] |= c - 'a' + 10;
- },
- else => return error.SyntaxError,
- }
- self.cursor += 1;
- self.value_start = self.cursor;
- self.state = .string;
- const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable;
- return partialStringCodepoint(code_point);
- },
-
- .string_utf8_last_byte => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 0x80...0xBF => {
- self.cursor += 1;
- self.state = .string;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Invalid UTF-8.
- }
- },
- .string_utf8_second_to_last_byte => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 0x80...0xBF => {
- self.cursor += 1;
- self.state = .string_utf8_last_byte;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Invalid UTF-8.
- }
- },
- .string_utf8_second_to_last_byte_guard_against_overlong => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 0xA0...0xBF => {
- self.cursor += 1;
- self.state = .string_utf8_last_byte;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Invalid UTF-8.
- }
- },
- .string_utf8_second_to_last_byte_guard_against_surrogate_half => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 0x80...0x9F => {
- self.cursor += 1;
- self.state = .string_utf8_last_byte;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Invalid UTF-8.
- }
- },
- .string_utf8_third_to_last_byte => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 0x80...0xBF => {
- self.cursor += 1;
- self.state = .string_utf8_second_to_last_byte;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Invalid UTF-8.
- }
- },
- .string_utf8_third_to_last_byte_guard_against_overlong => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 0x90...0xBF => {
- self.cursor += 1;
- self.state = .string_utf8_second_to_last_byte;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Invalid UTF-8.
- }
- },
- .string_utf8_third_to_last_byte_guard_against_too_large => {
- if (self.cursor >= self.input.len) return self.endOfBufferInString();
- switch (self.input[self.cursor]) {
- 0x80...0x8F => {
- self.cursor += 1;
- self.state = .string_utf8_second_to_last_byte;
- continue :state_loop;
- },
- else => return error.SyntaxError, // Invalid UTF-8.
- }
- },
-
- .literal_t => {
- switch (try self.expectByte()) {
- 'r' => {
- self.cursor += 1;
- self.state = .literal_tr;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_tr => {
- switch (try self.expectByte()) {
- 'u' => {
- self.cursor += 1;
- self.state = .literal_tru;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_tru => {
- switch (try self.expectByte()) {
- 'e' => {
- self.cursor += 1;
- self.state = .post_value;
- return .true;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_f => {
- switch (try self.expectByte()) {
- 'a' => {
- self.cursor += 1;
- self.state = .literal_fa;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_fa => {
- switch (try self.expectByte()) {
- 'l' => {
- self.cursor += 1;
- self.state = .literal_fal;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_fal => {
- switch (try self.expectByte()) {
- 's' => {
- self.cursor += 1;
- self.state = .literal_fals;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_fals => {
- switch (try self.expectByte()) {
- 'e' => {
- self.cursor += 1;
- self.state = .post_value;
- return .false;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_n => {
- switch (try self.expectByte()) {
- 'u' => {
- self.cursor += 1;
- self.state = .literal_nu;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_nu => {
- switch (try self.expectByte()) {
- 'l' => {
- self.cursor += 1;
- self.state = .literal_nul;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
- .literal_nul => {
- switch (try self.expectByte()) {
- 'l' => {
- self.cursor += 1;
- self.state = .post_value;
- return .null;
- },
- else => return error.SyntaxError,
- }
- },
- }
- unreachable;
- }
- }
-
- /// Seeks ahead in the input until the first byte of the next token (or the end of the input)
- /// determines which type of token will be returned from the next `next*()` call.
- /// This function is idempotent, only advancing past commas, colons, and inter-token whitespace.
- pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
- state_loop: while (true) {
- switch (self.state) {
- .value => {
- switch (try self.skipWhitespaceExpectByte()) {
- '{' => return .object_begin,
- '[' => return .array_begin,
- '"' => return .string,
- '-', '0'...'9' => return .number,
- 't' => return .true,
- 'f' => return .false,
- 'n' => return .null,
- else => return error.SyntaxError,
- }
- },
-
- .post_value => {
- if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
-
- const c = self.input[self.cursor];
- if (self.string_is_object_key) {
- self.string_is_object_key = false;
- switch (c) {
- ':' => {
- self.cursor += 1;
- self.state = .value;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- }
-
- switch (c) {
- '}' => return .object_end,
- ']' => return .array_end,
- ',' => {
- switch (self.stack.peek()) {
- OBJECT_MODE => {
- self.state = .object_post_comma;
- },
- ARRAY_MODE => {
- self.state = .value;
- },
- }
- self.cursor += 1;
- continue :state_loop;
- },
- else => return error.SyntaxError,
- }
- },
-
- .object_start => {
- switch (try self.skipWhitespaceExpectByte()) {
- '"' => return .string,
- '}' => return .object_end,
- else => return error.SyntaxError,
- }
- },
- .object_post_comma => {
- switch (try self.skipWhitespaceExpectByte()) {
- '"' => return .string,
- else => return error.SyntaxError,
- }
- },
-
- .array_start => {
- switch (try self.skipWhitespaceExpectByte()) {
- ']' => return .array_end,
- else => {
- self.state = .value;
- continue :state_loop;
- },
- }
- },
-
- .number_minus,
- .number_leading_zero,
- .number_int,
- .number_post_dot,
- .number_frac,
- .number_post_e,
- .number_post_e_sign,
- .number_exp,
- => return .number,
-
- .string,
- .string_backslash,
- .string_backslash_u,
- .string_backslash_u_1,
- .string_backslash_u_2,
- .string_backslash_u_3,
- .string_surrogate_half,
- .string_surrogate_half_backslash,
- .string_surrogate_half_backslash_u,
- .string_surrogate_half_backslash_u_1,
- .string_surrogate_half_backslash_u_2,
- .string_surrogate_half_backslash_u_3,
- => return .string,
-
- .string_utf8_last_byte,
- .string_utf8_second_to_last_byte,
- .string_utf8_second_to_last_byte_guard_against_overlong,
- .string_utf8_second_to_last_byte_guard_against_surrogate_half,
- .string_utf8_third_to_last_byte,
- .string_utf8_third_to_last_byte_guard_against_overlong,
- .string_utf8_third_to_last_byte_guard_against_too_large,
- => return .string,
-
- .literal_t,
- .literal_tr,
- .literal_tru,
- => return .true,
- .literal_f,
- .literal_fa,
- .literal_fal,
- .literal_fals,
- => return .false,
- .literal_n,
- .literal_nu,
- .literal_nul,
- => return .null,
- }
- unreachable;
- }
- }
-
- const State = enum {
- value,
- post_value,
-
- object_start,
- object_post_comma,
-
- array_start,
-
- number_minus,
- number_leading_zero,
- number_int,
- number_post_dot,
- number_frac,
- number_post_e,
- number_post_e_sign,
- number_exp,
-
- string,
- string_backslash,
- string_backslash_u,
- string_backslash_u_1,
- string_backslash_u_2,
- string_backslash_u_3,
- string_surrogate_half,
- string_surrogate_half_backslash,
- string_surrogate_half_backslash_u,
- string_surrogate_half_backslash_u_1,
- string_surrogate_half_backslash_u_2,
- string_surrogate_half_backslash_u_3,
-
- // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
- string_utf8_last_byte, // State A
- string_utf8_second_to_last_byte, // State B
- string_utf8_second_to_last_byte_guard_against_overlong, // State C
- string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D
- string_utf8_third_to_last_byte, // State E
- string_utf8_third_to_last_byte_guard_against_overlong, // State F
- string_utf8_third_to_last_byte_guard_against_too_large, // State G
-
- literal_t,
- literal_tr,
- literal_tru,
- literal_f,
- literal_fa,
- literal_fal,
- literal_fals,
- literal_n,
- literal_nu,
- literal_nul,
- };
-
- fn expectByte(self: *const @This()) !u8 {
- if (self.cursor < self.input.len) {
- return self.input[self.cursor];
- }
- // No byte.
- if (self.is_end_of_input) return error.UnexpectedEndOfInput;
- return error.BufferUnderrun;
- }
-
- fn skipWhitespace(self: *@This()) void {
- while (self.cursor < self.input.len) : (self.cursor += 1) {
- switch (self.input[self.cursor]) {
- // Whitespace
- ' ', '\t', '\r' => continue,
- '\n' => {
- if (self.diagnostics) |diag| {
- diag.line_number += 1;
- // This will count the newline itself,
- // which means a straight-forward subtraction will give a 1-based column number.
- diag.line_start_cursor = self.cursor;
- }
- continue;
- },
- else => return,
- }
- }
- }
-
- fn skipWhitespaceExpectByte(self: *@This()) !u8 {
- self.skipWhitespace();
- return self.expectByte();
- }
-
- fn skipWhitespaceCheckEnd(self: *@This()) !bool {
- self.skipWhitespace();
- if (self.cursor >= self.input.len) {
- // End of buffer.
- if (self.is_end_of_input) {
- // End of everything.
- if (self.stackHeight() == 0) {
- // We did it!
- return true;
- }
- return error.UnexpectedEndOfInput;
- }
- return error.BufferUnderrun;
- }
- if (self.stackHeight() == 0) return error.SyntaxError;
- return false;
- }
-
- fn takeValueSlice(self: *@This()) []const u8 {
- const slice = self.input[self.value_start..self.cursor];
- self.value_start = self.cursor;
- return slice;
- }
- fn takeValueSliceMinusTrailingOffset(self: *@This(), trailing_negative_offset: usize) []const u8 {
- // Check if the escape sequence started before the current input buffer.
- // (The algebra here is awkward to avoid unsigned underflow,
- // but it's just making sure the slice on the next line isn't UB.)
- if (self.cursor <= self.value_start + trailing_negative_offset) return "";
- const slice = self.input[self.value_start .. self.cursor - trailing_negative_offset];
- // When trailing_negative_offset is non-zero, setting self.value_start doesn't matter,
- // because we always set it again while emitting the .partial_string_escaped_*.
- self.value_start = self.cursor;
- return slice;
- }
-
- fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token {
- const slice = self.takeValueSlice();
- if (self.is_end_of_input) {
- if (!allow_end) return error.UnexpectedEndOfInput;
- self.state = .post_value;
- return Token{ .number = slice };
- }
- if (slice.len == 0) return error.BufferUnderrun;
- return Token{ .partial_number = slice };
- }
-
- fn endOfBufferInString(self: *@This()) !Token {
- if (self.is_end_of_input) return error.UnexpectedEndOfInput;
- const slice = self.takeValueSliceMinusTrailingOffset(switch (self.state) {
- // Don't include the escape sequence in the partial string.
- .string_backslash => 1,
- .string_backslash_u => 2,
- .string_backslash_u_1 => 3,
- .string_backslash_u_2 => 4,
- .string_backslash_u_3 => 5,
- .string_surrogate_half => 6,
- .string_surrogate_half_backslash => 7,
- .string_surrogate_half_backslash_u => 8,
- .string_surrogate_half_backslash_u_1 => 9,
- .string_surrogate_half_backslash_u_2 => 10,
- .string_surrogate_half_backslash_u_3 => 11,
-
- // Include everything up to the cursor otherwise.
- .string,
- .string_utf8_last_byte,
- .string_utf8_second_to_last_byte,
- .string_utf8_second_to_last_byte_guard_against_overlong,
- .string_utf8_second_to_last_byte_guard_against_surrogate_half,
- .string_utf8_third_to_last_byte,
- .string_utf8_third_to_last_byte_guard_against_overlong,
- .string_utf8_third_to_last_byte_guard_against_too_large,
- => 0,
-
- else => unreachable,
- });
- if (slice.len == 0) return error.BufferUnderrun;
- return Token{ .partial_string = slice };
- }
-
- fn partialStringCodepoint(code_point: u21) Token {
- var buf: [4]u8 = undefined;
- switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) {
- 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* },
- 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* },
- 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* },
- 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* },
- else => unreachable,
- }
- }
-};
-
-const OBJECT_MODE = 0;
-const ARRAY_MODE = 1;
-
-fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void {
- const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong;
- if (new_len > max_value_len) return error.ValueTooLong;
- try list.appendSlice(buf);
-}
-
-/// For the slice you get from a `Token.number` or `Token.allocated_number`,
-/// this function returns true if the number doesn't contain any fraction or exponent components, and is not `-0`.
-/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`.
-/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value.
-/// This function will not give meaningful results on non-numeric input.
-pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool {
- if (std.mem.eql(u8, value, "-0")) return false;
- return std.mem.indexOfAny(u8, value, ".eE") == null;
-}
-
-test {
- _ = @import("./scanner_test.zig");
-}
lib/std/json/Scanner.zig
@@ -0,0 +1,1767 @@
+//! The lowest level parsing API in this package;
+//! supports streaming input with a low memory footprint.
+//! The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input.
+//! Specifically `d/8` bytes are required for this purpose,
+//! with some extra buffer according to the implementation of `std.ArrayList`.
+//!
+//! This scanner can emit partial tokens; see `std.json.Token`.
+//! The input to this class is a sequence of input buffers that you must supply one at a time.
+//! Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned.
+//! Then call `feedInput()` again and so forth.
+//! Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`,
+//! or when `error.BufferUnderrun` requests more data and there is no more.
+//! Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned.
+//!
+//! Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259
+//! * RFC 8259 requires JSON documents be valid UTF-8,
+//! but makes an allowance for systems that are "part of a closed ecosystem".
+//! I have no idea what that's supposed to mean in the context of a standard specification.
+//! This implementation requires inputs to be valid UTF-8.
+//! * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits,
+//! but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed.
+//! (RFC 5234 defines HEXDIG to only allow uppercase.)
+//! * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value".
+//! See http://www.unicode.org/glossary/#unicode_scalar_value .
+//! * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences,
+//! but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?),
+//! which would mean that unpaired surrogate halves are forbidden.
+//! By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to)
+//! explicitly allows unpaired surrogate halves.
+//! This implementation forbids unpaired surrogate halves in \u sequences.
+//! If a high surrogate half appears in a \u sequence,
+//! then a low surrogate half must immediately follow in \u notation.
+//! * RFC 8259 allows implementations to "accept non-JSON forms or extensions".
+//! This implementation does not accept any of that.
+//! * RFC 8259 allows implementations to put limits on "the size of texts",
+//! "the maximum depth of nesting", "the range and precision of numbers",
+//! and "the length and character contents of strings".
+//! This low-level implementation does not limit these,
+//! except where noted above, and except that nesting depth requires memory allocation.
+//! Note that this low-level API does not interpret numbers numerically,
+//! but simply emits their source form for some higher level code to make sense of.
+//! * This low-level implementation allows duplicate object keys,
+//! and key/value pairs are emitted in the order they appear in the input.
+
+const Scanner = @This();
+const std = @import("std");
+
+const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
+const assert = std.debug.assert;
+const BitStack = std.BitStack;
+
+state: State = .value,
+string_is_object_key: bool = false,
+stack: BitStack,
+value_start: usize = undefined,
+utf16_code_units: [2]u16 = undefined,
+
+input: []const u8 = "",
+cursor: usize = 0,
+is_end_of_input: bool = false,
+diagnostics: ?*Diagnostics = null,
+
+/// The allocator is only used to track `[]` and `{}` nesting levels.
+pub fn initStreaming(allocator: Allocator) @This() {
+ return .{
+ .stack = BitStack.init(allocator),
+ };
+}
+/// Use this if your input is a single slice.
+/// This is effectively equivalent to:
+/// ```
+/// initStreaming(allocator);
+/// feedInput(complete_input);
+/// endInput();
+/// ```
+pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() {
+ return .{
+ .stack = BitStack.init(allocator),
+ .input = complete_input,
+ .is_end_of_input = true,
+ };
+}
+pub fn deinit(self: *@This()) void {
+ self.stack.deinit();
+ self.* = undefined;
+}
+
+pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
+ diagnostics.cursor_pointer = &self.cursor;
+ self.diagnostics = diagnostics;
+}
+
+/// Call this whenever you get `error.BufferUnderrun` from `next()`.
+/// When there is no more input to provide, call `endInput()`.
+pub fn feedInput(self: *@This(), input: []const u8) void {
+ assert(self.cursor == self.input.len); // Not done with the last input slice.
+ if (self.diagnostics) |diag| {
+ diag.total_bytes_before_current_input += self.input.len;
+ // This usually goes "negative" to measure how far before the beginning
+ // of the new buffer the current line started.
+ diag.line_start_cursor -%= self.cursor;
+ }
+ self.input = input;
+ self.cursor = 0;
+ self.value_start = 0;
+}
+/// Call this when you will no longer call `feedInput()` anymore.
+/// This can be called either immediately after the last `feedInput()`,
+/// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`.
+/// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`.
+pub fn endInput(self: *@This()) void {
+ self.is_end_of_input = true;
+}
+
+pub const NextError = Error || Allocator.Error || error{BufferUnderrun};
+pub const AllocError = Error || Allocator.Error || error{ValueTooLong};
+pub const PeekError = Error || error{BufferUnderrun};
+pub const SkipError = Error || Allocator.Error;
+pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun};
+
+/// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
+/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
+ return self.nextAllocMax(allocator, when, default_max_value_len);
+}
+
+/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+/// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
+ assert(self.is_end_of_input); // This function is not available in streaming mode.
+ const token_type = self.peekNextTokenType() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ switch (token_type) {
+ .number, .string => {
+ var value_list = ArrayList(u8).init(allocator);
+ errdefer {
+ value_list.deinit();
+ }
+ if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) |slice| {
+ return if (token_type == .number)
+ Token{ .number = slice }
+ else
+ Token{ .string = slice };
+ } else {
+ return if (token_type == .number)
+ Token{ .allocated_number = try value_list.toOwnedSlice() }
+ else
+ Token{ .allocated_string = try value_list.toOwnedSlice() };
+ }
+ },
+
+ // Simple tokens never alloc.
+ .object_begin,
+ .object_end,
+ .array_begin,
+ .array_end,
+ .true,
+ .false,
+ .null,
+ .end_of_document,
+ => return self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ },
+ }
+}
+
+/// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
+pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 {
+ return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
+}
+/// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`.
+/// When allocation is not necessary with `.alloc_if_needed`,
+/// this method returns the content slice from the input buffer, and `value_list` is not touched.
+/// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`,
+/// and returns `null` once the final `.number` or `.string` token has been written into it.
+/// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list.
+/// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation
+/// can be resumed by passing the same array list in again.
+/// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type;
+/// the caller of this method is expected to know which type of token is being processed.
+pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 {
+ while (true) {
+ const token = try self.next();
+ switch (token) {
+ // Accumulate partial values.
+ .partial_number, .partial_string => |slice| {
+ try appendSlice(value_list, slice, max_value_len);
+ },
+ .partial_string_escaped_1 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_2 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_3 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .partial_string_escaped_4 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+
+ // Return complete values.
+ .number => |slice| {
+ if (when == .alloc_if_needed and value_list.items.len == 0) {
+ // No alloc necessary.
+ return slice;
+ }
+ try appendSlice(value_list, slice, max_value_len);
+ // The token is complete.
+ return null;
+ },
+ .string => |slice| {
+ if (when == .alloc_if_needed and value_list.items.len == 0) {
+ // No alloc necessary.
+ return slice;
+ }
+ try appendSlice(value_list, slice, max_value_len);
+ // The token is complete.
+ return null;
+ },
+
+ .object_begin,
+ .object_end,
+ .array_begin,
+ .array_end,
+ .true,
+ .false,
+ .null,
+ .end_of_document,
+ => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this.
+
+ .allocated_number, .allocated_string => unreachable,
+ }
+ }
+}
+
+/// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+/// If the next token type is `.object_begin` or `.array_begin`,
+/// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found.
+/// If the next token type is `.number` or `.string`,
+/// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found.
+/// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once.
+/// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`;
+/// see `peekNextTokenType()`.
+pub fn skipValue(self: *@This()) SkipError!void {
+ assert(self.is_end_of_input); // This function is not available in streaming mode.
+ switch (self.peekNextTokenType() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) {
+ .object_begin, .array_begin => {
+ self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ },
+ .number, .string => {
+ while (true) {
+ switch (self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ }) {
+ .partial_number,
+ .partial_string,
+ .partial_string_escaped_1,
+ .partial_string_escaped_2,
+ .partial_string_escaped_3,
+ .partial_string_escaped_4,
+ => continue,
+
+ .number, .string => break,
+
+ else => unreachable,
+ }
+ }
+ },
+ .true, .false, .null => {
+ _ = self.next() catch |e| switch (e) {
+ error.BufferUnderrun => unreachable,
+ else => |err| return err,
+ };
+ },
+
+ .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
+ }
+}
+
+/// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height.
+/// Unlike `skipValue()`, this function is available in streaming mode.
+pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) NextError!void {
+ while (true) {
+ switch (try self.next()) {
+ .object_end, .array_end => {
+ if (self.stackHeight() == terminal_stack_height) break;
+ },
+ .end_of_document => unreachable,
+ else => continue,
+ }
+ }
+}
+
+/// The depth of `{}` or `[]` nesting levels at the current position.
+pub fn stackHeight(self: *const @This()) usize {
+ return self.stack.bit_len;
+}
+
+/// Pre allocate memory to hold the given number of nesting levels.
+/// `stackHeight()` up to the given number will not cause allocations.
+pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void {
+ try self.stack.ensureTotalCapacity(height);
+}
+
+/// See `std.json.Token` for documentation of this function.
+pub fn next(self: *@This()) NextError!Token {
+ state_loop: while (true) {
+ switch (self.state) {
+ .value => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ // Object, Array
+ '{' => {
+ try self.stack.push(OBJECT_MODE);
+ self.cursor += 1;
+ self.state = .object_start;
+ return .object_begin;
+ },
+ '[' => {
+ try self.stack.push(ARRAY_MODE);
+ self.cursor += 1;
+ self.state = .array_start;
+ return .array_begin;
+ },
+
+ // String
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ continue :state_loop;
+ },
+
+ // Number
+ '1'...'9' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_int;
+ continue :state_loop;
+ },
+ '0' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_leading_zero;
+ continue :state_loop;
+ },
+ '-' => {
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .number_minus;
+ continue :state_loop;
+ },
+
+ // literal values
+ 't' => {
+ self.cursor += 1;
+ self.state = .literal_t;
+ continue :state_loop;
+ },
+ 'f' => {
+ self.cursor += 1;
+ self.state = .literal_f;
+ continue :state_loop;
+ },
+ 'n' => {
+ self.cursor += 1;
+ self.state = .literal_n;
+ continue :state_loop;
+ },
+
+ else => return error.SyntaxError,
+ }
+ },
+
+ .post_value => {
+ if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
+
+ const c = self.input[self.cursor];
+ if (self.string_is_object_key) {
+ self.string_is_object_key = false;
+ switch (c) {
+ ':' => {
+ self.cursor += 1;
+ self.state = .value;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ }
+
+ switch (c) {
+ '}' => {
+ if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError;
+ self.cursor += 1;
+ // stay in .post_value state.
+ return .object_end;
+ },
+ ']' => {
+ if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError;
+ self.cursor += 1;
+ // stay in .post_value state.
+ return .array_end;
+ },
+ ',' => {
+ switch (self.stack.peek()) {
+ OBJECT_MODE => {
+ self.state = .object_post_comma;
+ },
+ ARRAY_MODE => {
+ self.state = .value;
+ },
+ }
+ self.cursor += 1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .object_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ self.string_is_object_key = true;
+ continue :state_loop;
+ },
+ '}' => {
+ self.cursor += 1;
+ _ = self.stack.pop();
+ self.state = .post_value;
+ return .object_end;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .object_post_comma => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ self.string_is_object_key = true;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .array_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ ']' => {
+ self.cursor += 1;
+ _ = self.stack.pop();
+ self.state = .post_value;
+ return .array_end;
+ },
+ else => {
+ self.state = .value;
+ continue :state_loop;
+ },
+ }
+ },
+
+ .number_minus => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0' => {
+ self.cursor += 1;
+ self.state = .number_leading_zero;
+ continue :state_loop;
+ },
+ '1'...'9' => {
+ self.cursor += 1;
+ self.state = .number_int;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_leading_zero => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true);
+ switch (self.input[self.cursor]) {
+ '.' => {
+ self.cursor += 1;
+ self.state = .number_post_dot;
+ continue :state_loop;
+ },
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ },
+ .number_int => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ '.' => {
+ self.cursor += 1;
+ self.state = .number_post_dot;
+ continue :state_loop;
+ },
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+ .number_post_dot => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_frac;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_frac => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ 'e', 'E' => {
+ self.cursor += 1;
+ self.state = .number_post_e;
+ continue :state_loop;
+ },
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+ .number_post_e => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_exp;
+ continue :state_loop;
+ },
+ '+', '-' => {
+ self.cursor += 1;
+ self.state = .number_post_e_sign;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_post_e_sign => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+ switch (self.input[self.cursor]) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.state = .number_exp;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .number_exp => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ '0'...'9' => continue,
+ else => {
+ self.state = .post_value;
+ return Token{ .number = self.takeValueSlice() };
+ },
+ }
+ }
+ return self.endOfBufferInNumber(true);
+ },
+
+ .string => {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string.
+
+ // ASCII plain text.
+ 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue,
+
+ // Special characters.
+ '"' => {
+ const result = Token{ .string = self.takeValueSlice() };
+ self.cursor += 1;
+ self.state = .post_value;
+ return result;
+ },
+ '\\' => {
+ const slice = self.takeValueSlice();
+ self.cursor += 1;
+ self.state = .string_backslash;
+ if (slice.len > 0) return Token{ .partial_string = slice };
+ continue :state_loop;
+ },
+
+ // UTF-8 validation.
+ // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
+ 0xC2...0xDF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ 0xE0 => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte_guard_against_overlong;
+ continue :state_loop;
+ },
+ 0xE1...0xEC, 0xEE...0xEF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ 0xED => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half;
+ continue :state_loop;
+ },
+ 0xF0 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte_guard_against_overlong;
+ continue :state_loop;
+ },
+ 0xF1...0xF3 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte;
+ continue :state_loop;
+ },
+ 0xF4 => {
+ self.cursor += 1;
+ self.state = .string_utf8_third_to_last_byte_guard_against_too_large;
+ continue :state_loop;
+ },
+ 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8.
+ }
+ }
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ const slice = self.takeValueSlice();
+ if (slice.len > 0) return Token{ .partial_string = slice };
+ return error.BufferUnderrun;
+ },
+ .string_backslash => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ '"', '\\', '/' => {
+ // Since these characters now represent themselves literally,
+ // we can simply begin the next plaintext slice here.
+ self.value_start = self.cursor;
+ self.cursor += 1;
+ self.state = .string;
+ continue :state_loop;
+ },
+ 'b' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{0x08} };
+ },
+ 'f' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{0x0c} };
+ },
+ 'n' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\n'} };
+ },
+ 'r' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\r'} };
+ },
+ 't' => {
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ return Token{ .partial_string_escaped_1 = [_]u8{'\t'} };
+ },
+ 'u' => {
+ self.cursor += 1;
+ self.state = .string_backslash_u;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .string_backslash_u => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[0] = @as(u16, c - '0') << 12;
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_1;
+ continue :state_loop;
+ },
+ .string_backslash_u_1 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[0] |= @as(u16, c - '0') << 8;
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_2;
+ continue :state_loop;
+ },
+ .string_backslash_u_2 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[0] |= @as(u16, c - '0') << 4;
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.state = .string_backslash_u_3;
+ continue :state_loop;
+ },
+ .string_backslash_u_3 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[0] |= c - '0';
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[0] |= c - 'A' + 10;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[0] |= c - 'a' + 10;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) {
+ self.state = .string_surrogate_half;
+ continue :state_loop;
+ } else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) {
+ return error.SyntaxError; // Unexpected low surrogate half.
+ } else {
+ self.value_start = self.cursor;
+ self.state = .string;
+ return partialStringCodepoint(self.utf16_code_units[0]);
+ }
+ },
+ .string_surrogate_half => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ '\\' => {
+ self.cursor += 1;
+ self.state = .string_surrogate_half_backslash;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .string_surrogate_half_backslash_u;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 'D', 'd' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] = 0xD << 12;
+ self.state = .string_surrogate_half_backslash_u_1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u_1 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ 'C'...'F' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8;
+ self.state = .string_surrogate_half_backslash_u_2;
+ continue :state_loop;
+ },
+ 'c'...'f' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8;
+ self.state = .string_surrogate_half_backslash_u_2;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Expected low surrogate half.
+ }
+ },
+ .string_surrogate_half_backslash_u_2 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - '0') << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ 'A'...'F' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ 'a'...'f' => {
+ self.cursor += 1;
+ self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4;
+ self.state = .string_surrogate_half_backslash_u_3;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .string_surrogate_half_backslash_u_3 => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ const c = self.input[self.cursor];
+ switch (c) {
+ '0'...'9' => {
+ self.utf16_code_units[1] |= c - '0';
+ },
+ 'A'...'F' => {
+ self.utf16_code_units[1] |= c - 'A' + 10;
+ },
+ 'a'...'f' => {
+ self.utf16_code_units[1] |= c - 'a' + 10;
+ },
+ else => return error.SyntaxError,
+ }
+ self.cursor += 1;
+ self.value_start = self.cursor;
+ self.state = .string;
+ const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable;
+ return partialStringCodepoint(code_point);
+ },
+
+ .string_utf8_last_byte => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte_guard_against_overlong => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0xA0...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0x9F => {
+ self.cursor += 1;
+ self.state = .string_utf8_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte_guard_against_overlong => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x90...0xBF => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+ .string_utf8_third_to_last_byte_guard_against_too_large => {
+ if (self.cursor >= self.input.len) return self.endOfBufferInString();
+ switch (self.input[self.cursor]) {
+ 0x80...0x8F => {
+ self.cursor += 1;
+ self.state = .string_utf8_second_to_last_byte;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError, // Invalid UTF-8.
+ }
+ },
+
+ .literal_t => {
+ switch (try self.expectByte()) {
+ 'r' => {
+ self.cursor += 1;
+ self.state = .literal_tr;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_tr => {
+ switch (try self.expectByte()) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .literal_tru;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_tru => {
+ switch (try self.expectByte()) {
+ 'e' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .true;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_f => {
+ switch (try self.expectByte()) {
+ 'a' => {
+ self.cursor += 1;
+ self.state = .literal_fa;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fa => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .literal_fal;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fal => {
+ switch (try self.expectByte()) {
+ 's' => {
+ self.cursor += 1;
+ self.state = .literal_fals;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_fals => {
+ switch (try self.expectByte()) {
+ 'e' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .false;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_n => {
+ switch (try self.expectByte()) {
+ 'u' => {
+ self.cursor += 1;
+ self.state = .literal_nu;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_nu => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .literal_nul;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ .literal_nul => {
+ switch (try self.expectByte()) {
+ 'l' => {
+ self.cursor += 1;
+ self.state = .post_value;
+ return .null;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+ }
+ unreachable;
+ }
+}
+
+/// Seeks ahead in the input until the first byte of the next token (or the end of the input)
+/// determines which type of token will be returned from the next `next*()` call.
+/// This function is idempotent, only advancing past commas, colons, and inter-token whitespace.
+pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
+ state_loop: while (true) {
+ switch (self.state) {
+ .value => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '{' => return .object_begin,
+ '[' => return .array_begin,
+ '"' => return .string,
+ '-', '0'...'9' => return .number,
+ 't' => return .true,
+ 'f' => return .false,
+ 'n' => return .null,
+ else => return error.SyntaxError,
+ }
+ },
+
+ .post_value => {
+ if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
+
+ const c = self.input[self.cursor];
+ if (self.string_is_object_key) {
+ self.string_is_object_key = false;
+ switch (c) {
+ ':' => {
+ self.cursor += 1;
+ self.state = .value;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ }
+
+ switch (c) {
+ '}' => return .object_end,
+ ']' => return .array_end,
+ ',' => {
+ switch (self.stack.peek()) {
+ OBJECT_MODE => {
+ self.state = .object_post_comma;
+ },
+ ARRAY_MODE => {
+ self.state = .value;
+ },
+ }
+ self.cursor += 1;
+ continue :state_loop;
+ },
+ else => return error.SyntaxError,
+ }
+ },
+
+ .object_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => return .string,
+ '}' => return .object_end,
+ else => return error.SyntaxError,
+ }
+ },
+ .object_post_comma => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ '"' => return .string,
+ else => return error.SyntaxError,
+ }
+ },
+
+ .array_start => {
+ switch (try self.skipWhitespaceExpectByte()) {
+ ']' => return .array_end,
+ else => {
+ self.state = .value;
+ continue :state_loop;
+ },
+ }
+ },
+
+ .number_minus,
+ .number_leading_zero,
+ .number_int,
+ .number_post_dot,
+ .number_frac,
+ .number_post_e,
+ .number_post_e_sign,
+ .number_exp,
+ => return .number,
+
+ .string,
+ .string_backslash,
+ .string_backslash_u,
+ .string_backslash_u_1,
+ .string_backslash_u_2,
+ .string_backslash_u_3,
+ .string_surrogate_half,
+ .string_surrogate_half_backslash,
+ .string_surrogate_half_backslash_u,
+ .string_surrogate_half_backslash_u_1,
+ .string_surrogate_half_backslash_u_2,
+ .string_surrogate_half_backslash_u_3,
+ => return .string,
+
+ .string_utf8_last_byte,
+ .string_utf8_second_to_last_byte,
+ .string_utf8_second_to_last_byte_guard_against_overlong,
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half,
+ .string_utf8_third_to_last_byte,
+ .string_utf8_third_to_last_byte_guard_against_overlong,
+ .string_utf8_third_to_last_byte_guard_against_too_large,
+ => return .string,
+
+ .literal_t,
+ .literal_tr,
+ .literal_tru,
+ => return .true,
+ .literal_f,
+ .literal_fa,
+ .literal_fal,
+ .literal_fals,
+ => return .false,
+ .literal_n,
+ .literal_nu,
+ .literal_nul,
+ => return .null,
+ }
+ unreachable;
+ }
+}
+
+const State = enum {
+ value,
+ post_value,
+
+ object_start,
+ object_post_comma,
+
+ array_start,
+
+ number_minus,
+ number_leading_zero,
+ number_int,
+ number_post_dot,
+ number_frac,
+ number_post_e,
+ number_post_e_sign,
+ number_exp,
+
+ string,
+ string_backslash,
+ string_backslash_u,
+ string_backslash_u_1,
+ string_backslash_u_2,
+ string_backslash_u_3,
+ string_surrogate_half,
+ string_surrogate_half_backslash,
+ string_surrogate_half_backslash_u,
+ string_surrogate_half_backslash_u_1,
+ string_surrogate_half_backslash_u_2,
+ string_surrogate_half_backslash_u_3,
+
+ // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
+ string_utf8_last_byte, // State A
+ string_utf8_second_to_last_byte, // State B
+ string_utf8_second_to_last_byte_guard_against_overlong, // State C
+ string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D
+ string_utf8_third_to_last_byte, // State E
+ string_utf8_third_to_last_byte_guard_against_overlong, // State F
+ string_utf8_third_to_last_byte_guard_against_too_large, // State G
+
+ literal_t,
+ literal_tr,
+ literal_tru,
+ literal_f,
+ literal_fa,
+ literal_fal,
+ literal_fals,
+ literal_n,
+ literal_nu,
+ literal_nul,
+};
+
+fn expectByte(self: *const @This()) !u8 {
+ if (self.cursor < self.input.len) {
+ return self.input[self.cursor];
+ }
+ // No byte.
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ return error.BufferUnderrun;
+}
+
+fn skipWhitespace(self: *@This()) void {
+ while (self.cursor < self.input.len) : (self.cursor += 1) {
+ switch (self.input[self.cursor]) {
+ // Whitespace
+ ' ', '\t', '\r' => continue,
+ '\n' => {
+ if (self.diagnostics) |diag| {
+ diag.line_number += 1;
+ // This will count the newline itself,
+ // which means a straight-forward subtraction will give a 1-based column number.
+ diag.line_start_cursor = self.cursor;
+ }
+ continue;
+ },
+ else => return,
+ }
+ }
+}
+
+fn skipWhitespaceExpectByte(self: *@This()) !u8 {
+ self.skipWhitespace();
+ return self.expectByte();
+}
+
+fn skipWhitespaceCheckEnd(self: *@This()) !bool {
+ self.skipWhitespace();
+ if (self.cursor >= self.input.len) {
+ // End of buffer.
+ if (self.is_end_of_input) {
+ // End of everything.
+ if (self.stackHeight() == 0) {
+ // We did it!
+ return true;
+ }
+ return error.UnexpectedEndOfInput;
+ }
+ return error.BufferUnderrun;
+ }
+ if (self.stackHeight() == 0) return error.SyntaxError;
+ return false;
+}
+
+fn takeValueSlice(self: *@This()) []const u8 {
+ const slice = self.input[self.value_start..self.cursor];
+ self.value_start = self.cursor;
+ return slice;
+}
+fn takeValueSliceMinusTrailingOffset(self: *@This(), trailing_negative_offset: usize) []const u8 {
+ // Check if the escape sequence started before the current input buffer.
+ // (The algebra here is awkward to avoid unsigned underflow,
+ // but it's just making sure the slice on the next line isn't UB.)
+ if (self.cursor <= self.value_start + trailing_negative_offset) return "";
+ const slice = self.input[self.value_start .. self.cursor - trailing_negative_offset];
+ // When trailing_negative_offset is non-zero, setting self.value_start doesn't matter,
+ // because we always set it again while emitting the .partial_string_escaped_*.
+ self.value_start = self.cursor;
+ return slice;
+}
+
+fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token {
+ const slice = self.takeValueSlice();
+ if (self.is_end_of_input) {
+ if (!allow_end) return error.UnexpectedEndOfInput;
+ self.state = .post_value;
+ return Token{ .number = slice };
+ }
+ if (slice.len == 0) return error.BufferUnderrun;
+ return Token{ .partial_number = slice };
+}
+
+fn endOfBufferInString(self: *@This()) !Token {
+ if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+ const slice = self.takeValueSliceMinusTrailingOffset(switch (self.state) {
+ // Don't include the escape sequence in the partial string.
+ .string_backslash => 1,
+ .string_backslash_u => 2,
+ .string_backslash_u_1 => 3,
+ .string_backslash_u_2 => 4,
+ .string_backslash_u_3 => 5,
+ .string_surrogate_half => 6,
+ .string_surrogate_half_backslash => 7,
+ .string_surrogate_half_backslash_u => 8,
+ .string_surrogate_half_backslash_u_1 => 9,
+ .string_surrogate_half_backslash_u_2 => 10,
+ .string_surrogate_half_backslash_u_3 => 11,
+
+ // Include everything up to the cursor otherwise.
+ .string,
+ .string_utf8_last_byte,
+ .string_utf8_second_to_last_byte,
+ .string_utf8_second_to_last_byte_guard_against_overlong,
+ .string_utf8_second_to_last_byte_guard_against_surrogate_half,
+ .string_utf8_third_to_last_byte,
+ .string_utf8_third_to_last_byte_guard_against_overlong,
+ .string_utf8_third_to_last_byte_guard_against_too_large,
+ => 0,
+
+ else => unreachable,
+ });
+ if (slice.len == 0) return error.BufferUnderrun;
+ return Token{ .partial_string = slice };
+}
+
+fn partialStringCodepoint(code_point: u21) Token {
+ var buf: [4]u8 = undefined;
+ switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) {
+ 1 => return Token{ .partial_string_escaped_1 = buf[0..1].* },
+ 2 => return Token{ .partial_string_escaped_2 = buf[0..2].* },
+ 3 => return Token{ .partial_string_escaped_3 = buf[0..3].* },
+ 4 => return Token{ .partial_string_escaped_4 = buf[0..4].* },
+ else => unreachable,
+ }
+}
+
+/// Scan the input and check for malformed JSON.
+/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`.
+/// Returns any errors from the allocator as-is, which is unlikely,
+/// but can be caused by extreme nesting depth in the input.
+pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool {
+ var scanner = Scanner.initCompleteInput(allocator, s);
+ defer scanner.deinit();
+
+ while (true) {
+ const token = scanner.next() catch |err| switch (err) {
+ error.SyntaxError, error.UnexpectedEndOfInput => return false,
+ error.OutOfMemory => return error.OutOfMemory,
+ error.BufferUnderrun => unreachable,
+ };
+ if (token == .end_of_document) break;
+ }
+
+ return true;
+}
+
+/// The parsing errors are divided into two categories:
+/// * `SyntaxError` is for clearly malformed JSON documents,
+/// such as giving an input document that isn't JSON at all.
+/// * `UnexpectedEndOfInput` is for signaling that everything's been
+/// valid so far, but the input appears to be truncated for some reason.
+/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`.
+pub const Error = error{ SyntaxError, UnexpectedEndOfInput };
+
+/// Used by `json.reader`.
+pub const default_buffer_size = 0x1000;
+
+/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar:
+/// ```
+/// <document> = <value> .end_of_document
+/// <value> =
+/// | <object>
+/// | <array>
+/// | <number>
+/// | <string>
+/// | .true
+/// | .false
+/// | .null
+/// <object> = .object_begin ( <string> <value> )* .object_end
+/// <array> = .array_begin ( <value> )* .array_end
+/// <number> = <It depends. See below.>
+/// <string> = <It depends. See below.>
+/// ```
+///
+/// What you get for `<number>` and `<string>` values depends on which `next*()` method you call:
+///
+/// ```
+/// next():
+/// <number> = ( .partial_number )* .number
+/// <string> = ( <partial_string> )* .string
+/// <partial_string> =
+/// | .partial_string
+/// | .partial_string_escaped_1
+/// | .partial_string_escaped_2
+/// | .partial_string_escaped_3
+/// | .partial_string_escaped_4
+///
+/// nextAlloc*(..., .alloc_always):
+/// <number> = .allocated_number
+/// <string> = .allocated_string
+///
+/// nextAlloc*(..., .alloc_if_needed):
+/// <number> =
+/// | .number
+/// | .allocated_number
+/// <string> =
+/// | .string
+/// | .allocated_string
+/// ```
+///
+/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value.
+/// For number values, this is the representation of the number exactly as it appears in the input.
+/// For strings, this is the content of the string after resolving escape sequences.
+///
+/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator.
+/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations.
+///
+/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences.
+/// To get a complete value in memory, you need to concatenate the values yourself.
+/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result.
+///
+/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer.
+/// The memory may become undefined during the next call to `json.Scanner.feedInput()`
+/// or any `json.Reader` method whose return error set includes `json.Error`.
+/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`,
+/// which makes a copy for you.
+///
+/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that
+/// the previously partial value is completed with no additional bytes.
+/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.)
+/// `.partial_*` tokens never have `0` length.
+///
+/// The recommended strategy for using the different `next*()` methods is something like this:
+///
+/// When you're expecting an object key, use `.alloc_if_needed`.
+/// You often don't need a copy of the key string to persist; you might just check which field it is.
+/// In the case that the key happens to require an allocation, free it immediately after checking it.
+///
+/// When you're expecting a meaningful string value (such as on the right of a `:`),
+/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document.
+///
+/// When you're expecting a number value, use `.alloc_if_needed`.
+/// You're probably going to be parsing the string representation of the number into a numeric representation,
+/// so you need the complete string representation only temporarily.
+///
+/// When you're skipping an unrecognized value, use `skipValue()`.
+pub const Token = union(enum) {
+ object_begin,
+ object_end,
+ array_begin,
+ array_end,
+
+ true,
+ false,
+ null,
+
+ number: []const u8,
+ partial_number: []const u8,
+ allocated_number: []u8,
+
+ string: []const u8,
+ partial_string: []const u8,
+ partial_string_escaped_1: [1]u8,
+ partial_string_escaped_2: [2]u8,
+ partial_string_escaped_3: [3]u8,
+ partial_string_escaped_4: [4]u8,
+ allocated_string: []u8,
+
+ end_of_document,
+};
+
+/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call.
+pub const TokenType = enum {
+ object_begin,
+ object_end,
+ array_begin,
+ array_end,
+ true,
+ false,
+ null,
+ number,
+ string,
+ end_of_document,
+};
+
+/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);`
+/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized.
+/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()`
+/// to get meaningful information from this.
+pub const Diagnostics = struct {
+ line_number: u64 = 1,
+ line_start_cursor: usize = @as(usize, @bitCast(@as(isize, -1))), // Start just "before" the input buffer to get a 1-based column for line 1.
+ total_bytes_before_current_input: u64 = 0,
+ cursor_pointer: *const usize = undefined,
+
+ /// Starts at 1.
+ pub fn getLine(self: *const @This()) u64 {
+ return self.line_number;
+ }
+ /// Starts at 1.
+ pub fn getColumn(self: *const @This()) u64 {
+ return self.cursor_pointer.* -% self.line_start_cursor;
+ }
+ /// Starts at 0. Measures the byte offset since the start of the input.
+ pub fn getByteOffset(self: *const @This()) u64 {
+ return self.total_bytes_before_current_input + self.cursor_pointer.*;
+ }
+};
+
+/// See the documentation for `std.json.Token`.
+pub const AllocWhen = enum { alloc_if_needed, alloc_always };
+
+/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default.
+/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`.
+pub const default_max_value_len = 4 * 1024 * 1024;
+
+/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader.
+pub const Reader = struct {
+ scanner: Scanner,
+ reader: *std.Io.Reader,
+
+ /// The allocator is only used to track `[]` and `{}` nesting levels.
+ pub fn init(allocator: Allocator, io_reader: *std.Io.Reader) @This() {
+ return .{
+ .scanner = Scanner.initStreaming(allocator),
+ .reader = io_reader,
+ };
+ }
+ pub fn deinit(self: *@This()) void {
+ self.scanner.deinit();
+ self.* = undefined;
+ }
+
+ /// Calls `std.json.Scanner.enableDiagnostics`.
+ pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
+ self.scanner.enableDiagnostics(diagnostics);
+ }
+
+ pub const NextError = std.Io.Reader.Error || Error || Allocator.Error;
+ pub const SkipError = Reader.NextError;
+ pub const AllocError = Reader.NextError || error{ValueTooLong};
+ pub const PeekError = std.Io.Reader.Error || Error;
+
+ /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
+ /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+ pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) Reader.AllocError!Token {
+ return self.nextAllocMax(allocator, when, default_max_value_len);
+ }
+ /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+ pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) Reader.AllocError!Token {
+ const token_type = try self.peekNextTokenType();
+ switch (token_type) {
+ .number, .string => {
+ var value_list = ArrayList(u8).init(allocator);
+ errdefer {
+ value_list.deinit();
+ }
+ if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| {
+ return if (token_type == .number)
+ Token{ .number = slice }
+ else
+ Token{ .string = slice };
+ } else {
+ return if (token_type == .number)
+ Token{ .allocated_number = try value_list.toOwnedSlice() }
+ else
+ Token{ .allocated_string = try value_list.toOwnedSlice() };
+ }
+ },
+
+ // Simple tokens never alloc.
+ .object_begin,
+ .object_end,
+ .array_begin,
+ .array_end,
+ .true,
+ .false,
+ .null,
+ .end_of_document,
+ => return try self.next(),
+ }
+ }
+
+ /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
+ pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) Reader.AllocError!?[]const u8 {
+ return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
+ }
+ /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`.
+ pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) Reader.AllocError!?[]const u8 {
+ while (true) {
+ return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) {
+ error.BufferUnderrun => {
+ try self.refillBuffer();
+ continue;
+ },
+ else => |other_err| return other_err,
+ };
+ }
+ }
+
+ /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`.
+ pub fn skipValue(self: *@This()) Reader.SkipError!void {
+ switch (try self.peekNextTokenType()) {
+ .object_begin, .array_begin => {
+ try self.skipUntilStackHeight(self.stackHeight());
+ },
+ .number, .string => {
+ while (true) {
+ switch (try self.next()) {
+ .partial_number,
+ .partial_string,
+ .partial_string_escaped_1,
+ .partial_string_escaped_2,
+ .partial_string_escaped_3,
+ .partial_string_escaped_4,
+ => continue,
+
+ .number, .string => break,
+
+ else => unreachable,
+ }
+ }
+ },
+ .true, .false, .null => {
+ _ = try self.next();
+ },
+
+ .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
+ }
+ }
+ /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`.
+ pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: usize) Reader.NextError!void {
+ while (true) {
+ return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) {
+ error.BufferUnderrun => {
+ try self.refillBuffer();
+ continue;
+ },
+ else => |other_err| return other_err,
+ };
+ }
+ }
+
+ /// Calls `std.json.Scanner.stackHeight`.
+ pub fn stackHeight(self: *const @This()) usize {
+ return self.scanner.stackHeight();
+ }
+ /// Calls `std.json.Scanner.ensureTotalStackCapacity`.
+ pub fn ensureTotalStackCapacity(self: *@This(), height: usize) Allocator.Error!void {
+ try self.scanner.ensureTotalStackCapacity(height);
+ }
+
+ /// See `std.json.Token` for documentation of this function.
+ pub fn next(self: *@This()) Reader.NextError!Token {
+ while (true) {
+ return self.scanner.next() catch |err| switch (err) {
+ error.BufferUnderrun => {
+ try self.refillBuffer();
+ continue;
+ },
+ else => |other_err| return other_err,
+ };
+ }
+ }
+
+ /// See `std.json.Scanner.peekNextTokenType()`.
+ pub fn peekNextTokenType(self: *@This()) Reader.PeekError!TokenType {
+ while (true) {
+ return self.scanner.peekNextTokenType() catch |err| switch (err) {
+ error.BufferUnderrun => {
+ try self.refillBuffer();
+ continue;
+ },
+ else => |other_err| return other_err,
+ };
+ }
+ }
+
+ fn refillBuffer(self: *@This()) std.Io.Reader.Error!void {
+ const input = self.reader.peekGreedy(1) catch |err| switch (err) {
+ error.ReadFailed => return error.ReadFailed,
+ error.EndOfStream => return self.scanner.endInput(),
+ };
+ self.reader.toss(input.len);
+ self.scanner.feedInput(input);
+ }
+};
+
+const OBJECT_MODE = 0;
+const ARRAY_MODE = 1;
+
+fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void {
+ const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong;
+ if (new_len > max_value_len) return error.ValueTooLong;
+ try list.appendSlice(buf);
+}
+
+/// For the slice you get from a `Token.number` or `Token.allocated_number`,
+/// this function returns true if the number doesn't contain any fraction or exponent components, and is not `-0`.
+/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`.
+/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value.
+/// This function will not give meaningful results on non-numeric input.
+pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool {
+ if (std.mem.eql(u8, value, "-0")) return false;
+ return std.mem.indexOfAny(u8, value, ".eE") == null;
+}
+
+test {
+ _ = @import("./scanner_test.zig");
+}
lib/std/json/scanner_test.zig
@@ -1,13 +1,11 @@
const std = @import("std");
-const JsonScanner = @import("./scanner.zig").Scanner;
-const jsonReader = @import("./scanner.zig").reader;
-const JsonReader = @import("./scanner.zig").Reader;
-const Token = @import("./scanner.zig").Token;
-const TokenType = @import("./scanner.zig").TokenType;
-const Diagnostics = @import("./scanner.zig").Diagnostics;
-const Error = @import("./scanner.zig").Error;
-const validate = @import("./scanner.zig").validate;
-const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger;
+const Scanner = @import("Scanner.zig");
+const Token = Scanner.Token;
+const TokenType = Scanner.TokenType;
+const Diagnostics = Scanner.Diagnostics;
+const Error = Scanner.Error;
+const validate = Scanner.validate;
+const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger;
const example_document_str =
\\{
@@ -36,7 +34,7 @@ fn expectPeekNext(scanner_or_reader: anytype, expected_token_type: TokenType, ex
}
test "token" {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str);
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str);
defer scanner.deinit();
try expectNext(&scanner, .object_begin);
@@ -138,23 +136,25 @@ fn testAllTypes(source: anytype, large_buffer: bool) !void {
}
test "peek all types" {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, all_types_test_case);
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, all_types_test_case);
defer scanner.deinit();
try testAllTypes(&scanner, true);
- var stream = std.io.fixedBufferStream(all_types_test_case);
- var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ var stream: std.Io.Reader = .fixed(all_types_test_case);
+ var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
defer json_reader.deinit();
try testAllTypes(&json_reader, true);
- var tiny_stream = std.io.fixedBufferStream(all_types_test_case);
- var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader());
+ var tiny_buffer: [1]u8 = undefined;
+ var tiny_stream: std.testing.Reader = .init(&tiny_buffer, &.{.{ .buffer = all_types_test_case }});
+ tiny_stream.artificial_limit = .limited(1);
+ var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream.interface);
defer tiny_json_reader.deinit();
try testAllTypes(&tiny_json_reader, false);
}
test "token mismatched close" {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }");
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }");
defer scanner.deinit();
try expectNext(&scanner, .array_begin);
try expectNext(&scanner, Token{ .number = "102" });
@@ -164,15 +164,15 @@ test "token mismatched close" {
}
test "token premature object close" {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, "{ \"key\": }");
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, "{ \"key\": }");
defer scanner.deinit();
try expectNext(&scanner, .object_begin);
try expectNext(&scanner, Token{ .string = "key" });
try std.testing.expectError(error.SyntaxError, scanner.next());
}
-test "JsonScanner basic" {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, example_document_str);
+test "Scanner basic" {
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str);
defer scanner.deinit();
while (true) {
@@ -181,10 +181,10 @@ test "JsonScanner basic" {
}
}
-test "JsonReader basic" {
- var stream = std.io.fixedBufferStream(example_document_str);
+test "Scanner.Reader basic" {
+ var stream: std.Io.Reader = .fixed(example_document_str);
- var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
defer json_reader.deinit();
while (true) {
@@ -215,7 +215,7 @@ const number_test_items = blk: {
test "numbers" {
for (number_test_items) |number_str| {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, number_str);
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, number_str);
defer scanner.deinit();
const token = try scanner.next();
@@ -243,10 +243,10 @@ const string_test_cases = .{
test "strings" {
inline for (string_test_cases) |tuple| {
- var stream = std.io.fixedBufferStream("\"" ++ tuple[0] ++ "\"");
+ var stream: std.Io.Reader = .fixed("\"" ++ tuple[0] ++ "\"");
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
- var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
defer json_reader.deinit();
const token = try json_reader.nextAlloc(arena.allocator(), .alloc_if_needed);
@@ -289,7 +289,7 @@ test "nesting" {
}
fn expectMaybeError(document_str: []const u8, maybe_error: ?Error) !void {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, document_str);
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, document_str);
defer scanner.deinit();
while (true) {
@@ -352,12 +352,12 @@ fn expectEqualTokens(expected_token: Token, actual_token: Token) !void {
}
fn testTinyBufferSize(document_str: []const u8) !void {
- var tiny_stream = std.io.fixedBufferStream(document_str);
- var normal_stream = std.io.fixedBufferStream(document_str);
+ var tiny_stream: std.Io.Reader = .fixed(document_str);
+ var normal_stream: std.Io.Reader = .fixed(document_str);
- var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader());
+ var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream);
defer tiny_json_reader.deinit();
- var normal_json_reader = JsonReader(0x1000, @TypeOf(normal_stream.reader())).init(std.testing.allocator, normal_stream.reader());
+ var normal_json_reader: Scanner.Reader = .init(std.testing.allocator, &normal_stream);
defer normal_json_reader.deinit();
expectEqualStreamOfTokens(&normal_json_reader, &tiny_json_reader) catch |err| {
@@ -397,13 +397,13 @@ test "validate" {
}
fn testSkipValue(s: []const u8) !void {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s);
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, s);
defer scanner.deinit();
try scanner.skipValue();
try expectEqualTokens(.end_of_document, try scanner.next());
- var stream = std.io.fixedBufferStream(s);
- var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ var stream: std.Io.Reader = .fixed(s);
+ var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
defer json_reader.deinit();
try json_reader.skipValue();
try expectEqualTokens(.end_of_document, try json_reader.next());
@@ -441,7 +441,7 @@ fn testEnsureStackCapacity(do_ensure: bool) !void {
try input_string.appendNTimes(std.testing.allocator, ']', nestings);
defer input_string.deinit(std.testing.allocator);
- var scanner = JsonScanner.initCompleteInput(failing_allocator, input_string.items);
+ var scanner = Scanner.initCompleteInput(failing_allocator, input_string.items);
defer scanner.deinit();
if (do_ensure) {
@@ -473,17 +473,17 @@ fn testDiagnosticsFromSource(expected_error: ?anyerror, line: u64, col: u64, byt
try std.testing.expectEqual(byte_offset, diagnostics.getByteOffset());
}
fn testDiagnostics(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, s: []const u8) !void {
- var scanner = JsonScanner.initCompleteInput(std.testing.allocator, s);
+ var scanner = Scanner.initCompleteInput(std.testing.allocator, s);
defer scanner.deinit();
try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &scanner);
- var tiny_stream = std.io.fixedBufferStream(s);
- var tiny_json_reader = JsonReader(1, @TypeOf(tiny_stream.reader())).init(std.testing.allocator, tiny_stream.reader());
+ var tiny_stream: std.Io.Reader = .fixed(s);
+ var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream);
defer tiny_json_reader.deinit();
try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &tiny_json_reader);
- var medium_stream = std.io.fixedBufferStream(s);
- var medium_json_reader = JsonReader(5, @TypeOf(medium_stream.reader())).init(std.testing.allocator, medium_stream.reader());
+ var medium_stream: std.Io.Reader = .fixed(s);
+ var medium_json_reader: Scanner.Reader = .init(std.testing.allocator, &medium_stream);
defer medium_json_reader.deinit();
try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &medium_json_reader);
}
lib/std/json/static.zig
@@ -4,11 +4,11 @@ const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const ArrayList = std.ArrayList;
-const Scanner = @import("./scanner.zig").Scanner;
-const Token = @import("./scanner.zig").Token;
-const AllocWhen = @import("./scanner.zig").AllocWhen;
-const default_max_value_len = @import("./scanner.zig").default_max_value_len;
-const isNumberFormattedLikeAnInteger = @import("./scanner.zig").isNumberFormattedLikeAnInteger;
+const Scanner = @import("Scanner.zig");
+const Token = Scanner.Token;
+const AllocWhen = Scanner.AllocWhen;
+const default_max_value_len = Scanner.default_max_value_len;
+const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger;
const Value = @import("./dynamic.zig").Value;
const Array = @import("./dynamic.zig").Array;
lib/std/json/static_test.zig
@@ -12,9 +12,7 @@ const parseFromValue = @import("./static.zig").parseFromValue;
const parseFromValueLeaky = @import("./static.zig").parseFromValueLeaky;
const ParseOptions = @import("./static.zig").ParseOptions;
-const JsonScanner = @import("./scanner.zig").Scanner;
-const jsonReader = @import("./scanner.zig").reader;
-const Diagnostics = @import("./scanner.zig").Diagnostics;
+const Scanner = @import("Scanner.zig");
const Value = @import("./dynamic.zig").Value;
@@ -300,9 +298,9 @@ const subnamespaces_0_doc =
fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void {
// First do the one with the debug info in case we get a SyntaxError or something.
{
- var scanner = JsonScanner.initCompleteInput(testing.allocator, doc);
+ var scanner = Scanner.initCompleteInput(testing.allocator, doc);
defer scanner.deinit();
- var diagnostics = Diagnostics{};
+ var diagnostics = Scanner.Diagnostics{};
scanner.enableDiagnostics(&diagnostics);
var parsed = parseFromTokenSource(T, testing.allocator, &scanner, .{}) catch |e| {
std.debug.print("at line,col: {}:{}\n", .{ diagnostics.getLine(), diagnostics.getColumn() });
@@ -317,8 +315,8 @@ fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void {
try testing.expectEqualDeep(expected, parsed.value);
}
{
- var stream = std.io.fixedBufferStream(doc);
- var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ var stream: std.Io.Reader = .fixed(doc);
+ var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
defer json_reader.deinit();
var parsed = try parseFromTokenSource(T, testing.allocator, &json_reader, .{});
defer parsed.deinit();
@@ -331,13 +329,13 @@ fn testAllParseFunctions(comptime T: type, expected: T, doc: []const u8) !void {
try testing.expectEqualDeep(expected, try parseFromSliceLeaky(T, arena.allocator(), doc, .{}));
}
{
- var scanner = JsonScanner.initCompleteInput(testing.allocator, doc);
+ var scanner = Scanner.initCompleteInput(testing.allocator, doc);
defer scanner.deinit();
try testing.expectEqualDeep(expected, try parseFromTokenSourceLeaky(T, arena.allocator(), &scanner, .{}));
}
{
- var stream = std.io.fixedBufferStream(doc);
- var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ var stream: std.Io.Reader = .fixed(doc);
+ var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
defer json_reader.deinit();
try testing.expectEqualDeep(expected, try parseFromTokenSourceLeaky(T, arena.allocator(), &json_reader, .{}));
}
@@ -763,7 +761,7 @@ test "parse exponential into int" {
test "parseFromTokenSource" {
{
- var scanner = JsonScanner.initCompleteInput(testing.allocator, "123");
+ var scanner = Scanner.initCompleteInput(testing.allocator, "123");
defer scanner.deinit();
var parsed = try parseFromTokenSource(u32, testing.allocator, &scanner, .{});
defer parsed.deinit();
@@ -771,8 +769,8 @@ test "parseFromTokenSource" {
}
{
- var stream = std.io.fixedBufferStream("123");
- var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ var stream: std.Io.Reader = .fixed("123");
+ var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
defer json_reader.deinit();
var parsed = try parseFromTokenSource(u32, testing.allocator, &json_reader, .{});
defer parsed.deinit();
@@ -836,7 +834,7 @@ test "json parse partial" {
\\}
;
const allocator = testing.allocator;
- var scanner = JsonScanner.initCompleteInput(allocator, str);
+ var scanner = Scanner.initCompleteInput(allocator, str);
defer scanner.deinit();
var arena = ArenaAllocator.init(allocator);
@@ -886,8 +884,8 @@ test "json parse allocate when streaming" {
var arena = ArenaAllocator.init(allocator);
defer arena.deinit();
- var stream = std.io.fixedBufferStream(str);
- var json_reader = jsonReader(std.testing.allocator, stream.reader());
+ var stream: std.Io.Reader = .fixed(str);
+ var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
const parsed = parseFromTokenSourceLeaky(T, arena.allocator(), &json_reader, .{}) catch |err| {
json_reader.deinit();
lib/std/json/Stringify.zig
@@ -0,0 +1,999 @@
+//! Writes JSON ([RFC8259](https://tools.ietf.org/html/rfc8259)) formatted data
+//! to a stream.
+//!
+//! The sequence of method calls to write JSON content must follow this grammar:
+//! ```
+//! <once> = <value>
+//! <value> =
+//! | <object>
+//! | <array>
+//! | write
+//! | print
+//! | <writeRawStream>
+//! <object> = beginObject ( <field> <value> )* endObject
+//! <field> = objectField | objectFieldRaw | <objectFieldRawStream>
+//! <array> = beginArray ( <value> )* endArray
+//! <writeRawStream> = beginWriteRaw ( stream.writeAll )* endWriteRaw
+//! <objectFieldRawStream> = beginObjectFieldRaw ( stream.writeAll )* endObjectFieldRaw
+//! ```
+
+const std = @import("../std.zig");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
+const BitStack = std.BitStack;
+const Stringify = @This();
+const Writer = std.io.Writer;
+
+const IndentationMode = enum(u1) {
+ object = 0,
+ array = 1,
+};
+
+writer: *Writer,
+options: Options = .{},
+indent_level: usize = 0,
+next_punctuation: enum {
+ the_beginning,
+ none,
+ comma,
+ colon,
+} = .the_beginning,
+
+nesting_stack: switch (safety_checks) {
+ .checked_to_fixed_depth => |fixed_buffer_size| [(fixed_buffer_size + 7) >> 3]u8,
+ .assumed_correct => void,
+} = switch (safety_checks) {
+ .checked_to_fixed_depth => @splat(0),
+ .assumed_correct => {},
+},
+
+raw_streaming_mode: if (build_mode_has_safety)
+ enum { none, value, objectField }
+else
+ void = if (build_mode_has_safety) .none else {},
+
+const build_mode_has_safety = switch (@import("builtin").mode) {
+ .Debug, .ReleaseSafe => true,
+ .ReleaseFast, .ReleaseSmall => false,
+};
+
+/// The `safety_checks_hint` parameter determines how much memory is used to enable assertions that the above grammar is being followed,
+/// e.g. tripping an assertion rather than allowing `endObject` to emit the final `}` in `[[[]]}`.
+/// "Depth" in this context means the depth of nested `[]` or `{}` expressions
+/// (or equivalently the amount of recursion on the `<value>` grammar expression above).
+/// For example, emitting the JSON `[[[]]]` requires a depth of 3.
+/// If `.checked_to_fixed_depth` is used, there is additionally an assertion that the nesting depth never exceeds the given limit.
+/// `.checked_to_fixed_depth` embeds the storage required in the `Stringify` struct.
+/// `.assumed_correct` requires no space and performs none of these assertions.
+/// In `ReleaseFast` and `ReleaseSmall` mode, the given `safety_checks_hint` is ignored and is always treated as `.assumed_correct`.
+const safety_checks_hint: union(enum) {
+ /// Rounded up to the nearest multiple of 8.
+ checked_to_fixed_depth: usize,
+ assumed_correct,
+} = .{ .checked_to_fixed_depth = 256 };
+
+const safety_checks: @TypeOf(safety_checks_hint) = if (build_mode_has_safety)
+ safety_checks_hint
+else
+ .assumed_correct;
+
+pub const Error = Writer.Error;
+
+pub fn beginArray(self: *Stringify) Error!void {
+ if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
+ try self.valueStart();
+ try self.writer.writeByte('[');
+ try self.pushIndentation(.array);
+ self.next_punctuation = .none;
+}
+
+pub fn beginObject(self: *Stringify) Error!void {
+ if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
+ try self.valueStart();
+ try self.writer.writeByte('{');
+ try self.pushIndentation(.object);
+ self.next_punctuation = .none;
+}
+
+pub fn endArray(self: *Stringify) Error!void {
+ if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
+ self.popIndentation(.array);
+ switch (self.next_punctuation) {
+ .none => {},
+ .comma => {
+ try self.indent();
+ },
+ .the_beginning, .colon => unreachable,
+ }
+ try self.writer.writeByte(']');
+ self.valueDone();
+}
+
+pub fn endObject(self: *Stringify) Error!void {
+ if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
+ self.popIndentation(.object);
+ switch (self.next_punctuation) {
+ .none => {},
+ .comma => {
+ try self.indent();
+ },
+ .the_beginning, .colon => unreachable,
+ }
+ try self.writer.writeByte('}');
+ self.valueDone();
+}
+
+fn pushIndentation(self: *Stringify, mode: IndentationMode) !void {
+ switch (safety_checks) {
+ .checked_to_fixed_depth => {
+ BitStack.pushWithStateAssumeCapacity(&self.nesting_stack, &self.indent_level, @intFromEnum(mode));
+ },
+ .assumed_correct => {
+ self.indent_level += 1;
+ },
+ }
+}
+fn popIndentation(self: *Stringify, expected_mode: IndentationMode) void {
+ switch (safety_checks) {
+ .checked_to_fixed_depth => {
+ assert(BitStack.popWithState(&self.nesting_stack, &self.indent_level) == @intFromEnum(expected_mode));
+ },
+ .assumed_correct => {
+ self.indent_level -= 1;
+ },
+ }
+}
+
+fn indent(self: *Stringify) !void {
+ var char: u8 = ' ';
+ const n_chars = switch (self.options.whitespace) {
+ .minified => return,
+ .indent_1 => 1 * self.indent_level,
+ .indent_2 => 2 * self.indent_level,
+ .indent_3 => 3 * self.indent_level,
+ .indent_4 => 4 * self.indent_level,
+ .indent_8 => 8 * self.indent_level,
+ .indent_tab => blk: {
+ char = '\t';
+ break :blk self.indent_level;
+ },
+ };
+ try self.writer.writeByte('\n');
+ try self.writer.splatByteAll(char, n_chars);
+}
+
+fn valueStart(self: *Stringify) !void {
+ if (self.isObjectKeyExpected()) |is_it| assert(!is_it); // Call objectField*(), not write(), for object keys.
+ return self.valueStartAssumeTypeOk();
+}
+fn objectFieldStart(self: *Stringify) !void {
+ if (self.isObjectKeyExpected()) |is_it| assert(is_it); // Expected write(), not objectField*().
+ return self.valueStartAssumeTypeOk();
+}
+fn valueStartAssumeTypeOk(self: *Stringify) !void {
+ assert(!self.isComplete()); // JSON document already complete.
+ switch (self.next_punctuation) {
+ .the_beginning => {
+ // No indentation for the very beginning.
+ },
+ .none => {
+ // First item in a container.
+ try self.indent();
+ },
+ .comma => {
+ // Subsequent item in a container.
+ try self.writer.writeByte(',');
+ try self.indent();
+ },
+ .colon => {
+ try self.writer.writeByte(':');
+ if (self.options.whitespace != .minified) {
+ try self.writer.writeByte(' ');
+ }
+ },
+ }
+}
+fn valueDone(self: *Stringify) void {
+ self.next_punctuation = .comma;
+}
+
+// Only when safety is enabled:
+fn isObjectKeyExpected(self: *const Stringify) ?bool {
+ switch (safety_checks) {
+ .checked_to_fixed_depth => return self.indent_level > 0 and
+ BitStack.peekWithState(&self.nesting_stack, self.indent_level) == @intFromEnum(IndentationMode.object) and
+ self.next_punctuation != .colon,
+ .assumed_correct => return null,
+ }
+}
+fn isComplete(self: *const Stringify) bool {
+ return self.indent_level == 0 and self.next_punctuation == .comma;
+}
+
+/// An alternative to calling `write` that formats a value with `std.fmt`.
+/// This function does the usual punctuation and indentation formatting
+/// assuming the resulting formatted string represents a single complete value;
+/// e.g. `"1"`, `"[]"`, `"[1,2]"`, not `"1,2"`.
+/// This function may be useful for doing your own number formatting.
+pub fn print(self: *Stringify, comptime fmt: []const u8, args: anytype) Error!void {
+ if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
+ try self.valueStart();
+ try self.writer.print(fmt, args);
+ self.valueDone();
+}
+
+test print {
+ var out_buf: [1024]u8 = undefined;
+ var out: Writer = .fixed(&out_buf);
+
+ var w: Stringify = .{ .writer = &out, .options = .{ .whitespace = .indent_2 } };
+
+ try w.beginObject();
+ try w.objectField("a");
+ try w.print("[ ]", .{});
+ try w.objectField("b");
+ try w.beginArray();
+ try w.print("[{s}] ", .{"[]"});
+ try w.print(" {}", .{12345});
+ try w.endArray();
+ try w.endObject();
+
+ const expected =
+ \\{
+ \\ "a": [ ],
+ \\ "b": [
+ \\ [[]] ,
+ \\ 12345
+ \\ ]
+ \\}
+ ;
+ try std.testing.expectEqualStrings(expected, out.buffered());
+}
+
+/// An alternative to calling `write` that allows you to write directly to the `.writer` field, e.g. with `.writer.writeAll()`.
+/// Call `beginWriteRaw()`, then write a complete value (including any quotes if necessary) directly to the `.writer` field,
+/// then call `endWriteRaw()`.
+/// This can be useful for streaming very long strings into the output without needing it all buffered in memory.
+pub fn beginWriteRaw(self: *Stringify) !void {
+ if (build_mode_has_safety) {
+ assert(self.raw_streaming_mode == .none);
+ self.raw_streaming_mode = .value;
+ }
+ try self.valueStart();
+}
+
+/// See `beginWriteRaw`.
+pub fn endWriteRaw(self: *Stringify) void {
+ if (build_mode_has_safety) {
+ assert(self.raw_streaming_mode == .value);
+ self.raw_streaming_mode = .none;
+ }
+ self.valueDone();
+}
+
+/// See `Stringify` for when to call this method.
+/// `key` is the string content of the property name.
+/// Surrounding quotes will be added and any special characters will be escaped.
+/// See also `objectFieldRaw`.
+pub fn objectField(self: *Stringify, key: []const u8) Error!void {
+ if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
+ try self.objectFieldStart();
+ try encodeJsonString(key, self.options, self.writer);
+ self.next_punctuation = .colon;
+}
+/// See `Stringify` for when to call this method.
+/// `quoted_key` is the complete bytes of the key including quotes and any necessary escape sequences.
+/// A few assertions are performed on the given value to ensure that the caller of this function understands the API contract.
+/// See also `objectField`.
+pub fn objectFieldRaw(self: *Stringify, quoted_key: []const u8) Error!void {
+ if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
+ assert(quoted_key.len >= 2 and quoted_key[0] == '"' and quoted_key[quoted_key.len - 1] == '"'); // quoted_key should be "quoted".
+ try self.objectFieldStart();
+ try self.writer.writeAll(quoted_key);
+ self.next_punctuation = .colon;
+}
+
+/// In the rare case that you need to write very long object field names,
+/// this is an alternative to `objectField` and `objectFieldRaw` that allows you to write directly to the `.writer` field
+/// similar to `beginWriteRaw`.
+/// Call `endObjectFieldRaw()` when you're done.
+pub fn beginObjectFieldRaw(self: *Stringify) !void {
+ if (build_mode_has_safety) {
+ assert(self.raw_streaming_mode == .none);
+ self.raw_streaming_mode = .objectField;
+ }
+ try self.objectFieldStart();
+}
+
+/// See `beginObjectFieldRaw`.
+pub fn endObjectFieldRaw(self: *Stringify) void {
+ if (build_mode_has_safety) {
+ assert(self.raw_streaming_mode == .objectField);
+ self.raw_streaming_mode = .none;
+ }
+ self.next_punctuation = .colon;
+}
+
+/// Renders the given Zig value as JSON.
+///
+/// Supported types:
+/// * Zig `bool` -> JSON `true` or `false`.
+/// * Zig `?T` -> `null` or the rendering of `T`.
+/// * Zig `i32`, `u64`, etc. -> JSON number or string.
+/// * When option `emit_nonportable_numbers_as_strings` is true, if the value is outside the range `+-1<<53` (the precise integer range of f64), it is rendered as a JSON string in base 10. Otherwise, it is rendered as JSON number.
+/// * Zig floats -> JSON number or string.
+/// * If the value cannot be precisely represented by an f64, it is rendered as a JSON string. Otherwise, it is rendered as JSON number.
+/// * TODO: Float rendering will likely change in the future, e.g. to remove the unnecessary "e+00".
+/// * Zig `[]const u8`, `[]u8`, `*[N]u8`, `@Vector(N, u8)`, and similar -> JSON string.
+/// * See `Options.emit_strings_as_arrays`.
+/// * If the content is not valid UTF-8, rendered as an array of numbers instead.
+/// * Zig `[]T`, `[N]T`, `*[N]T`, `@Vector(N, T)`, and similar -> JSON array of the rendering of each item.
+/// * Zig tuple -> JSON array of the rendering of each item.
+/// * Zig `struct` -> JSON object with each field in declaration order.
+/// * If the struct declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `Stringify`. See `std.json.Value` for an example.
+/// * See `Options.emit_null_optional_fields`.
+/// * Zig `union(enum)` -> JSON object with one field named for the active tag and a value representing the payload.
+/// * If the payload is `void`, then the emitted value is `{}`.
+/// * If the union declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `Stringify`.
+/// * Zig `enum` -> JSON string naming the active tag.
+/// * If the enum declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `Stringify`.
+/// * If the enum is non-exhaustive, unnamed values are rendered as integers.
+/// * Zig untyped enum literal -> JSON string naming the active tag.
+/// * Zig error -> JSON string naming the error.
+/// * Zig `*T` -> the rendering of `T`. Note there is no guard against circular-reference infinite recursion.
+///
+/// See also alternative functions `print` and `beginWriteRaw`.
+/// For writing object field names, use `objectField` instead.
+pub fn write(self: *Stringify, v: anytype) Error!void {
+ if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
+ const T = @TypeOf(v);
+ switch (@typeInfo(T)) {
+ .int => {
+ try self.valueStart();
+ if (self.options.emit_nonportable_numbers_as_strings and
+ (v <= -(1 << 53) or v >= (1 << 53)))
+ {
+ try self.writer.print("\"{}\"", .{v});
+ } else {
+ try self.writer.print("{}", .{v});
+ }
+ self.valueDone();
+ return;
+ },
+ .comptime_int => {
+ return self.write(@as(std.math.IntFittingRange(v, v), v));
+ },
+ .float, .comptime_float => {
+ if (@as(f64, @floatCast(v)) == v) {
+ try self.valueStart();
+ try self.writer.print("{}", .{@as(f64, @floatCast(v))});
+ self.valueDone();
+ return;
+ }
+ try self.valueStart();
+ try self.writer.print("\"{}\"", .{v});
+ self.valueDone();
+ return;
+ },
+
+ .bool => {
+ try self.valueStart();
+ try self.writer.writeAll(if (v) "true" else "false");
+ self.valueDone();
+ return;
+ },
+ .null => {
+ try self.valueStart();
+ try self.writer.writeAll("null");
+ self.valueDone();
+ return;
+ },
+ .optional => {
+ if (v) |payload| {
+ return try self.write(payload);
+ } else {
+ return try self.write(null);
+ }
+ },
+ .@"enum" => |enum_info| {
+ if (std.meta.hasFn(T, "jsonStringify")) {
+ return v.jsonStringify(self);
+ }
+
+ if (!enum_info.is_exhaustive) {
+ inline for (enum_info.fields) |field| {
+ if (v == @field(T, field.name)) {
+ break;
+ }
+ } else {
+ return self.write(@intFromEnum(v));
+ }
+ }
+
+ return self.stringValue(@tagName(v));
+ },
+ .enum_literal => {
+ return self.stringValue(@tagName(v));
+ },
+ .@"union" => {
+ if (std.meta.hasFn(T, "jsonStringify")) {
+ return v.jsonStringify(self);
+ }
+
+ const info = @typeInfo(T).@"union";
+ if (info.tag_type) |UnionTagType| {
+ try self.beginObject();
+ inline for (info.fields) |u_field| {
+ if (v == @field(UnionTagType, u_field.name)) {
+ try self.objectField(u_field.name);
+ if (u_field.type == void) {
+ // void v is {}
+ try self.beginObject();
+ try self.endObject();
+ } else {
+ try self.write(@field(v, u_field.name));
+ }
+ break;
+ }
+ } else {
+ unreachable; // No active tag?
+ }
+ try self.endObject();
+ return;
+ } else {
+ @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'");
+ }
+ },
+ .@"struct" => |S| {
+ if (std.meta.hasFn(T, "jsonStringify")) {
+ return v.jsonStringify(self);
+ }
+
+ if (S.is_tuple) {
+ try self.beginArray();
+ } else {
+ try self.beginObject();
+ }
+ inline for (S.fields) |Field| {
+ // don't include void fields
+ if (Field.type == void) continue;
+
+ var emit_field = true;
+
+ // don't include optional fields that are null when emit_null_optional_fields is set to false
+ if (@typeInfo(Field.type) == .optional) {
+ if (self.options.emit_null_optional_fields == false) {
+ if (@field(v, Field.name) == null) {
+ emit_field = false;
+ }
+ }
+ }
+
+ if (emit_field) {
+ if (!S.is_tuple) {
+ try self.objectField(Field.name);
+ }
+ try self.write(@field(v, Field.name));
+ }
+ }
+ if (S.is_tuple) {
+ try self.endArray();
+ } else {
+ try self.endObject();
+ }
+ return;
+ },
+ .error_set => return self.stringValue(@errorName(v)),
+ .pointer => |ptr_info| switch (ptr_info.size) {
+ .one => switch (@typeInfo(ptr_info.child)) {
+ .array => {
+ // Coerce `*[N]T` to `[]const T`.
+ const Slice = []const std.meta.Elem(ptr_info.child);
+ return self.write(@as(Slice, v));
+ },
+ else => {
+ return self.write(v.*);
+ },
+ },
+ .many, .slice => {
+ if (ptr_info.size == .many and ptr_info.sentinel() == null)
+ @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel");
+ const slice = if (ptr_info.size == .many) std.mem.span(v) else v;
+
+ if (ptr_info.child == u8) {
+ // This is a []const u8, or some similar Zig string.
+ if (!self.options.emit_strings_as_arrays and std.unicode.utf8ValidateSlice(slice)) {
+ return self.stringValue(slice);
+ }
+ }
+
+ try self.beginArray();
+ for (slice) |x| {
+ try self.write(x);
+ }
+ try self.endArray();
+ return;
+ },
+ else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
+ },
+ .array => {
+ // Coerce `[N]T` to `*const [N]T` (and then to `[]const T`).
+ return self.write(&v);
+ },
+ .vector => |info| {
+ const array: [info.len]info.child = v;
+ return self.write(&array);
+ },
+ else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
+ }
+ unreachable;
+}
+
+fn stringValue(self: *Stringify, s: []const u8) !void {
+ try self.valueStart();
+ try encodeJsonString(s, self.options, self.writer);
+ self.valueDone();
+}
+
+pub const Options = struct {
+ /// Controls the whitespace emitted.
+ /// The default `.minified` is a compact encoding with no whitespace between tokens.
+ /// Any setting other than `.minified` will use newlines, indentation, and a space after each ':'.
+ /// `.indent_1` means 1 space for each indentation level, `.indent_2` means 2 spaces, etc.
+ /// `.indent_tab` uses a tab for each indentation level.
+ whitespace: enum {
+ minified,
+ indent_1,
+ indent_2,
+ indent_3,
+ indent_4,
+ indent_8,
+ indent_tab,
+ } = .minified,
+
+ /// Should optional fields with null value be written?
+ emit_null_optional_fields: bool = true,
+
+ /// Arrays/slices of u8 are typically encoded as JSON strings.
+ /// This option emits them as arrays of numbers instead.
+ /// Does not affect calls to `objectField*()`.
+ emit_strings_as_arrays: bool = false,
+
+ /// Should unicode characters be escaped in strings?
+ escape_unicode: bool = false,
+
+ /// When true, renders numbers outside the range `+-1<<53` (the precise integer range of f64) as JSON strings in base 10.
+ emit_nonportable_numbers_as_strings: bool = false,
+};
+
+/// Writes the given value to the `Writer` writer.
+/// See `Stringify` for how the given value is serialized into JSON.
+/// The maximum nesting depth of the output JSON document is 256.
+pub fn value(v: anytype, options: Options, writer: *Writer) Error!void {
+ var s: Stringify = .{ .writer = writer, .options = options };
+ try s.write(v);
+}
+
+test value {
+ var out: std.io.Writer.Allocating = .init(std.testing.allocator);
+ const writer = &out.writer;
+ defer out.deinit();
+
+ const T = struct { a: i32, b: []const u8 };
+ try value(T{ .a = 123, .b = "xy" }, .{}, writer);
+ try std.testing.expectEqualSlices(u8, "{\"a\":123,\"b\":\"xy\"}", out.getWritten());
+
+ try testStringify("9999999999999999", 9999999999999999, .{});
+ try testStringify("\"9999999999999999\"", 9999999999999999, .{ .emit_nonportable_numbers_as_strings = true });
+
+ try testStringify("[1,1]", @as(@Vector(2, u32), @splat(1)), .{});
+ try testStringify("\"AA\"", @as(@Vector(2, u8), @splat('A')), .{});
+ try testStringify("[65,65]", @as(@Vector(2, u8), @splat('A')), .{ .emit_strings_as_arrays = true });
+
+ // void field
+ try testStringify("{\"foo\":42}", struct {
+ foo: u32,
+ bar: void = {},
+ }{ .foo = 42 }, .{});
+
+ const Tuple = struct { []const u8, usize };
+ try testStringify("[\"foo\",42]", Tuple{ "foo", 42 }, .{});
+
+ comptime {
+ testStringify("false", false, .{}) catch unreachable;
+ const MyStruct = struct { foo: u32 };
+ testStringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
+ MyStruct{ .foo = 42 },
+ MyStruct{ .foo = 100 },
+ MyStruct{ .foo = 1000 },
+ }, .{}) catch unreachable;
+ }
+}
+
+/// Calls `value` and stores the result in dynamically allocated memory instead
+/// of taking a writer.
+///
+/// Caller owns returned memory.
+pub fn valueAlloc(gpa: Allocator, v: anytype, options: Options) error{OutOfMemory}![]u8 {
+ var aw: std.io.Writer.Allocating = .init(gpa);
+ defer aw.deinit();
+ value(v, options, &aw.writer) catch return error.OutOfMemory;
+ return aw.toOwnedSlice();
+}
+
+test valueAlloc {
+ const allocator = std.testing.allocator;
+ const expected =
+ \\{"foo":"bar","answer":42,"my_friend":"sammy"}
+ ;
+ const actual = try valueAlloc(allocator, .{ .foo = "bar", .answer = 42, .my_friend = "sammy" }, .{});
+ defer allocator.free(actual);
+
+ try std.testing.expectEqualStrings(expected, actual);
+}
+
+fn outputUnicodeEscape(codepoint: u21, w: *Writer) Error!void {
+ if (codepoint <= 0xFFFF) {
+ // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
+ // then it may be represented as a six-character sequence: a reverse solidus, followed
+ // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point.
+ try w.writeAll("\\u");
+ try w.printInt(codepoint, 16, .lower, .{ .width = 4, .fill = '0' });
+ } else {
+ assert(codepoint <= 0x10FFFF);
+ // To escape an extended character that is not in the Basic Multilingual Plane,
+ // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair.
+ const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;
+ const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;
+ try w.writeAll("\\u");
+ try w.printInt(high, 16, .lower, .{ .width = 4, .fill = '0' });
+ try w.writeAll("\\u");
+ try w.printInt(low, 16, .lower, .{ .width = 4, .fill = '0' });
+ }
+}
+
+fn outputSpecialEscape(c: u8, writer: *Writer) Error!void {
+ switch (c) {
+ '\\' => try writer.writeAll("\\\\"),
+ '\"' => try writer.writeAll("\\\""),
+ 0x08 => try writer.writeAll("\\b"),
+ 0x0C => try writer.writeAll("\\f"),
+ '\n' => try writer.writeAll("\\n"),
+ '\r' => try writer.writeAll("\\r"),
+ '\t' => try writer.writeAll("\\t"),
+ else => try outputUnicodeEscape(c, writer),
+ }
+}
+
+/// Write `string` to `writer` as a JSON encoded string.
+pub fn encodeJsonString(string: []const u8, options: Options, writer: *Writer) Error!void {
+ try writer.writeByte('\"');
+ try encodeJsonStringChars(string, options, writer);
+ try writer.writeByte('\"');
+}
+
+/// Write `chars` to `writer` as JSON encoded string characters.
+pub fn encodeJsonStringChars(chars: []const u8, options: Options, writer: *Writer) Error!void {
+ var write_cursor: usize = 0;
+ var i: usize = 0;
+ if (options.escape_unicode) {
+ while (i < chars.len) : (i += 1) {
+ switch (chars[i]) {
+ // normal ascii character
+ 0x20...0x21, 0x23...0x5B, 0x5D...0x7E => {},
+ 0x00...0x1F, '\\', '\"' => {
+ // Always must escape these.
+ try writer.writeAll(chars[write_cursor..i]);
+ try outputSpecialEscape(chars[i], writer);
+ write_cursor = i + 1;
+ },
+ 0x7F...0xFF => {
+ try writer.writeAll(chars[write_cursor..i]);
+ const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable;
+ const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable;
+ try outputUnicodeEscape(codepoint, writer);
+ i += ulen - 1;
+ write_cursor = i + 1;
+ },
+ }
+ }
+ } else {
+ while (i < chars.len) : (i += 1) {
+ switch (chars[i]) {
+ // normal bytes
+ 0x20...0x21, 0x23...0x5B, 0x5D...0xFF => {},
+ 0x00...0x1F, '\\', '\"' => {
+ // Always must escape these.
+ try writer.writeAll(chars[write_cursor..i]);
+ try outputSpecialEscape(chars[i], writer);
+ write_cursor = i + 1;
+ },
+ }
+ }
+ }
+ try writer.writeAll(chars[write_cursor..chars.len]);
+}
+
+test "json write stream" {
+ var out_buf: [1024]u8 = undefined;
+ var out: Writer = .fixed(&out_buf);
+ var w: Stringify = .{ .writer = &out, .options = .{ .whitespace = .indent_2 } };
+ try testBasicWriteStream(&w);
+}
+
+fn testBasicWriteStream(w: *Stringify) !void {
+ w.writer.end = 0;
+
+ try w.beginObject();
+
+ try w.objectField("object");
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ try w.write(try getJsonObject(arena_allocator.allocator()));
+
+ try w.objectFieldRaw("\"string\"");
+ try w.write("This is a string");
+
+ try w.objectField("array");
+ try w.beginArray();
+ try w.write("Another string");
+ try w.write(@as(i32, 1));
+ try w.write(@as(f32, 3.5));
+ try w.endArray();
+
+ try w.objectField("int");
+ try w.write(@as(i32, 10));
+
+ try w.objectField("float");
+ try w.write(@as(f32, 3.5));
+
+ try w.endObject();
+
+ const expected =
+ \\{
+ \\ "object": {
+ \\ "one": 1,
+ \\ "two": 2
+ \\ },
+ \\ "string": "This is a string",
+ \\ "array": [
+ \\ "Another string",
+ \\ 1,
+ \\ 3.5
+ \\ ],
+ \\ "int": 10,
+ \\ "float": 3.5
+ \\}
+ ;
+ try std.testing.expectEqualStrings(expected, w.writer.buffered());
+}
+
+fn getJsonObject(allocator: std.mem.Allocator) !std.json.Value {
+ var v: std.json.Value = .{ .object = std.json.ObjectMap.init(allocator) };
+ try v.object.put("one", std.json.Value{ .integer = @as(i64, @intCast(1)) });
+ try v.object.put("two", std.json.Value{ .float = 2.0 });
+ return v;
+}
+
+test "stringify null optional fields" {
+ const MyStruct = struct {
+ optional: ?[]const u8 = null,
+ required: []const u8 = "something",
+ another_optional: ?[]const u8 = null,
+ another_required: []const u8 = "something else",
+ };
+ try testStringify(
+ \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"}
+ ,
+ MyStruct{},
+ .{},
+ );
+ try testStringify(
+ \\{"required":"something","another_required":"something else"}
+ ,
+ MyStruct{},
+ .{ .emit_null_optional_fields = false },
+ );
+}
+
+test "stringify basic types" {
+ try testStringify("false", false, .{});
+ try testStringify("true", true, .{});
+ try testStringify("null", @as(?u8, null), .{});
+ try testStringify("null", @as(?*u32, null), .{});
+ try testStringify("42", 42, .{});
+ try testStringify("42", 42.0, .{});
+ try testStringify("42", @as(u8, 42), .{});
+ try testStringify("42", @as(u128, 42), .{});
+ try testStringify("9999999999999999", 9999999999999999, .{});
+ try testStringify("42", @as(f32, 42), .{});
+ try testStringify("42", @as(f64, 42), .{});
+ try testStringify("\"ItBroke\"", @as(anyerror, error.ItBroke), .{});
+ try testStringify("\"ItBroke\"", error.ItBroke, .{});
+}
+
+test "stringify string" {
+ try testStringify("\"hello\"", "hello", .{});
+ try testStringify("\"with\\nescapes\\r\"", "with\nescapes\r", .{});
+ try testStringify("\"with\\nescapes\\r\"", "with\nescapes\r", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\\u0001\"", "with unicode\u{1}", .{});
+ try testStringify("\"with unicode\\u0001\"", "with unicode\u{1}", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\u{80}\"", "with unicode\u{80}", .{});
+ try testStringify("\"with unicode\\u0080\"", "with unicode\u{80}", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", .{});
+ try testStringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\u{100}\"", "with unicode\u{100}", .{});
+ try testStringify("\"with unicode\\u0100\"", "with unicode\u{100}", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\u{800}\"", "with unicode\u{800}", .{});
+ try testStringify("\"with unicode\\u0800\"", "with unicode\u{800}", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", .{});
+ try testStringify("\"with unicode\\u8000\"", "with unicode\u{8000}", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", .{});
+ try testStringify("\"with unicode\\ud799\"", "with unicode\u{D799}", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", .{});
+ try testStringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", .{ .escape_unicode = true });
+ try testStringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", .{});
+ try testStringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", .{ .escape_unicode = true });
+}
+
+test "stringify many-item sentinel-terminated string" {
+ try testStringify("\"hello\"", @as([*:0]const u8, "hello"), .{});
+ try testStringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), .{ .escape_unicode = true });
+ try testStringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), .{ .escape_unicode = true });
+}
+
+test "stringify enums" {
+ const E = enum {
+ foo,
+ bar,
+ };
+ try testStringify("\"foo\"", E.foo, .{});
+ try testStringify("\"bar\"", E.bar, .{});
+}
+
+test "stringify non-exhaustive enum" {
+ const E = enum(u8) {
+ foo = 0,
+ _,
+ };
+ try testStringify("\"foo\"", E.foo, .{});
+ try testStringify("1", @as(E, @enumFromInt(1)), .{});
+}
+
+test "stringify enum literals" {
+ try testStringify("\"foo\"", .foo, .{});
+ try testStringify("\"bar\"", .bar, .{});
+}
+
+test "stringify tagged unions" {
+ const T = union(enum) {
+ nothing,
+ foo: u32,
+ bar: bool,
+ };
+ try testStringify("{\"nothing\":{}}", T{ .nothing = {} }, .{});
+ try testStringify("{\"foo\":42}", T{ .foo = 42 }, .{});
+ try testStringify("{\"bar\":true}", T{ .bar = true }, .{});
+}
+
+test "stringify struct" {
+ try testStringify("{\"foo\":42}", struct {
+ foo: u32,
+ }{ .foo = 42 }, .{});
+}
+
+test "emit_strings_as_arrays" {
+ // Should only affect string values, not object keys.
+ try testStringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, .{});
+ try testStringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, .{ .emit_strings_as_arrays = true });
+ // Should *not* affect these types:
+ try testStringify("\"foo\"", @as(enum { foo, bar }, .foo), .{ .emit_strings_as_arrays = true });
+ try testStringify("\"ItBroke\"", error.ItBroke, .{ .emit_strings_as_arrays = true });
+ // Should work on these:
+ try testStringify("\"bar\"", @Vector(3, u8){ 'b', 'a', 'r' }, .{});
+ try testStringify("[98,97,114]", @Vector(3, u8){ 'b', 'a', 'r' }, .{ .emit_strings_as_arrays = true });
+ try testStringify("\"bar\"", [3]u8{ 'b', 'a', 'r' }, .{});
+ try testStringify("[98,97,114]", [3]u8{ 'b', 'a', 'r' }, .{ .emit_strings_as_arrays = true });
+}
+
+test "stringify struct with indentation" {
+ try testStringify(
+ \\{
+ \\ "foo": 42,
+ \\ "bar": [
+ \\ 1,
+ \\ 2,
+ \\ 3
+ \\ ]
+ \\}
+ ,
+ struct {
+ foo: u32,
+ bar: [3]u32,
+ }{
+ .foo = 42,
+ .bar = .{ 1, 2, 3 },
+ },
+ .{ .whitespace = .indent_4 },
+ );
+ try testStringify(
+ "{\n\t\"foo\": 42,\n\t\"bar\": [\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}",
+ struct {
+ foo: u32,
+ bar: [3]u32,
+ }{
+ .foo = 42,
+ .bar = .{ 1, 2, 3 },
+ },
+ .{ .whitespace = .indent_tab },
+ );
+ try testStringify(
+ \\{"foo":42,"bar":[1,2,3]}
+ ,
+ struct {
+ foo: u32,
+ bar: [3]u32,
+ }{
+ .foo = 42,
+ .bar = .{ 1, 2, 3 },
+ },
+ .{ .whitespace = .minified },
+ );
+}
+
+test "stringify array of structs" {
+ const MyStruct = struct {
+ foo: u32,
+ };
+ try testStringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
+ MyStruct{ .foo = 42 },
+ MyStruct{ .foo = 100 },
+ MyStruct{ .foo = 1000 },
+ }, .{});
+}
+
+test "stringify struct with custom stringifier" {
+ try testStringify("[\"something special\",42]", struct {
+ foo: u32,
+ const Self = @This();
+ pub fn jsonStringify(v: @This(), jws: anytype) !void {
+ _ = v;
+ try jws.beginArray();
+ try jws.write("something special");
+ try jws.write(42);
+ try jws.endArray();
+ }
+ }{ .foo = 42 }, .{});
+}
+
+fn testStringify(expected: []const u8, v: anytype, options: Options) !void {
+ var buffer: [4096]u8 = undefined;
+ var w: Writer = .fixed(&buffer);
+ try value(v, options, &w);
+ try std.testing.expectEqualStrings(expected, w.buffered());
+}
+
+test "raw streaming" {
+ var out_buf: [1024]u8 = undefined;
+ var out: Writer = .fixed(&out_buf);
+
+ var w: Stringify = .{ .writer = &out, .options = .{ .whitespace = .indent_2 } };
+ try w.beginObject();
+ try w.beginObjectFieldRaw();
+ try w.writer.writeAll("\"long");
+ try w.writer.writeAll(" key\"");
+ w.endObjectFieldRaw();
+ try w.beginWriteRaw();
+ try w.writer.writeAll("\"long");
+ try w.writer.writeAll(" value\"");
+ w.endWriteRaw();
+ try w.endObject();
+
+ const expected =
+ \\{
+ \\ "long key": "long value"
+ \\}
+ ;
+ try std.testing.expectEqualStrings(expected, w.writer.buffered());
+}
lib/std/json/stringify.zig
@@ -1,772 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const Allocator = std.mem.Allocator;
-const ArrayList = std.ArrayList;
-const BitStack = std.BitStack;
-
-const OBJECT_MODE = 0;
-const ARRAY_MODE = 1;
-
-pub const StringifyOptions = struct {
- /// Controls the whitespace emitted.
- /// The default `.minified` is a compact encoding with no whitespace between tokens.
- /// Any setting other than `.minified` will use newlines, indentation, and a space after each ':'.
- /// `.indent_1` means 1 space for each indentation level, `.indent_2` means 2 spaces, etc.
- /// `.indent_tab` uses a tab for each indentation level.
- whitespace: enum {
- minified,
- indent_1,
- indent_2,
- indent_3,
- indent_4,
- indent_8,
- indent_tab,
- } = .minified,
-
- /// Should optional fields with null value be written?
- emit_null_optional_fields: bool = true,
-
- /// Arrays/slices of u8 are typically encoded as JSON strings.
- /// This option emits them as arrays of numbers instead.
- /// Does not affect calls to `objectField*()`.
- emit_strings_as_arrays: bool = false,
-
- /// Should unicode characters be escaped in strings?
- escape_unicode: bool = false,
-
- /// When true, renders numbers outside the range `+-1<<53` (the precise integer range of f64) as JSON strings in base 10.
- emit_nonportable_numbers_as_strings: bool = false,
-};
-
-/// Writes the given value to the `std.io.GenericWriter` stream.
-/// See `WriteStream` for how the given value is serialized into JSON.
-/// The maximum nesting depth of the output JSON document is 256.
-/// See also `stringifyMaxDepth` and `stringifyArbitraryDepth`.
-pub fn stringify(
- value: anytype,
- options: StringifyOptions,
- out_stream: anytype,
-) @TypeOf(out_stream).Error!void {
- var jw = writeStream(out_stream, options);
- defer jw.deinit();
- try jw.write(value);
-}
-
-/// Like `stringify` with configurable nesting depth.
-/// `max_depth` is rounded up to the nearest multiple of 8.
-/// Give `null` for `max_depth` to disable some safety checks and allow arbitrary nesting depth.
-/// See `writeStreamMaxDepth` for more info.
-pub fn stringifyMaxDepth(
- value: anytype,
- options: StringifyOptions,
- out_stream: anytype,
- comptime max_depth: ?usize,
-) @TypeOf(out_stream).Error!void {
- var jw = writeStreamMaxDepth(out_stream, options, max_depth);
- try jw.write(value);
-}
-
-/// Like `stringify` but takes an allocator to facilitate safety checks while allowing arbitrary nesting depth.
-/// These safety checks can be helpful when debugging custom `jsonStringify` implementations;
-/// See `WriteStream`.
-pub fn stringifyArbitraryDepth(
- allocator: Allocator,
- value: anytype,
- options: StringifyOptions,
- out_stream: anytype,
-) WriteStream(@TypeOf(out_stream), .checked_to_arbitrary_depth).Error!void {
- var jw = writeStreamArbitraryDepth(allocator, out_stream, options);
- defer jw.deinit();
- try jw.write(value);
-}
-
-/// Calls `stringifyArbitraryDepth` and stores the result in dynamically allocated memory
-/// instead of taking a `std.io.GenericWriter`.
-///
-/// Caller owns returned memory.
-pub fn stringifyAlloc(
- allocator: Allocator,
- value: anytype,
- options: StringifyOptions,
-) error{OutOfMemory}![]u8 {
- var list = std.ArrayList(u8).init(allocator);
- errdefer list.deinit();
- try stringifyArbitraryDepth(allocator, value, options, list.writer());
- return list.toOwnedSlice();
-}
-
-/// See `WriteStream` for documentation.
-/// Equivalent to calling `writeStreamMaxDepth` with a depth of `256`.
-///
-/// The caller does *not* need to call `deinit()` on the returned object.
-pub fn writeStream(
- out_stream: anytype,
- options: StringifyOptions,
-) WriteStream(@TypeOf(out_stream), .{ .checked_to_fixed_depth = 256 }) {
- return writeStreamMaxDepth(out_stream, options, 256);
-}
-
-/// See `WriteStream` for documentation.
-/// The returned object includes 1 bit of size per `max_depth` to enable safety checks on the order of method calls;
-/// see the grammar in the `WriteStream` documentation.
-/// `max_depth` is rounded up to the nearest multiple of 8.
-/// If the nesting depth exceeds `max_depth`, it is detectable illegal behavior.
-/// Give `null` for `max_depth` to disable safety checks for the grammar and allow arbitrary nesting depth.
-/// In `ReleaseFast` and `ReleaseSmall`, `max_depth` is ignored, effectively equivalent to passing `null`.
-/// Alternatively, see `writeStreamArbitraryDepth` to do safety checks to arbitrary depth.
-///
-/// The caller does *not* need to call `deinit()` on the returned object.
-pub fn writeStreamMaxDepth(
- out_stream: anytype,
- options: StringifyOptions,
- comptime max_depth: ?usize,
-) WriteStream(
- @TypeOf(out_stream),
- if (max_depth) |d| .{ .checked_to_fixed_depth = d } else .assumed_correct,
-) {
- return WriteStream(
- @TypeOf(out_stream),
- if (max_depth) |d| .{ .checked_to_fixed_depth = d } else .assumed_correct,
- ).init(undefined, out_stream, options);
-}
-
-/// See `WriteStream` for documentation.
-/// This version of the write stream enables safety checks to arbitrarily deep nesting levels
-/// by using the given allocator.
-/// The caller should call `deinit()` on the returned object to free allocated memory.
-///
-/// In `ReleaseFast` and `ReleaseSmall` mode, this function is effectively equivalent to calling `writeStreamMaxDepth(..., null)`;
-/// in those build modes, the allocator is *not used*.
-pub fn writeStreamArbitraryDepth(
- allocator: Allocator,
- out_stream: anytype,
- options: StringifyOptions,
-) WriteStream(@TypeOf(out_stream), .checked_to_arbitrary_depth) {
- return WriteStream(@TypeOf(out_stream), .checked_to_arbitrary_depth).init(allocator, out_stream, options);
-}
-
-/// Writes JSON ([RFC8259](https://tools.ietf.org/html/rfc8259)) formatted data
-/// to a stream.
-///
-/// The sequence of method calls to write JSON content must follow this grammar:
-/// ```
-/// <once> = <value>
-/// <value> =
-/// | <object>
-/// | <array>
-/// | write
-/// | print
-/// | <writeRawStream>
-/// <object> = beginObject ( <field> <value> )* endObject
-/// <field> = objectField | objectFieldRaw | <objectFieldRawStream>
-/// <array> = beginArray ( <value> )* endArray
-/// <writeRawStream> = beginWriteRaw ( stream.writeAll )* endWriteRaw
-/// <objectFieldRawStream> = beginObjectFieldRaw ( stream.writeAll )* endObjectFieldRaw
-/// ```
-///
-/// The `safety_checks_hint` parameter determines how much memory is used to enable assertions that the above grammar is being followed,
-/// e.g. tripping an assertion rather than allowing `endObject` to emit the final `}` in `[[[]]}`.
-/// "Depth" in this context means the depth of nested `[]` or `{}` expressions
-/// (or equivalently the amount of recursion on the `<value>` grammar expression above).
-/// For example, emitting the JSON `[[[]]]` requires a depth of 3.
-/// If `.checked_to_fixed_depth` is used, there is additionally an assertion that the nesting depth never exceeds the given limit.
-/// `.checked_to_arbitrary_depth` requires a runtime allocator for the memory.
-/// `.checked_to_fixed_depth` embeds the storage required in the `WriteStream` struct.
-/// `.assumed_correct` requires no space and performs none of these assertions.
-/// In `ReleaseFast` and `ReleaseSmall` mode, the given `safety_checks_hint` is ignored and is always treated as `.assumed_correct`.
-pub fn WriteStream(
- comptime OutStream: type,
- comptime safety_checks_hint: union(enum) {
- checked_to_arbitrary_depth,
- checked_to_fixed_depth: usize, // Rounded up to the nearest multiple of 8.
- assumed_correct,
- },
-) type {
- return struct {
- const Self = @This();
- const build_mode_has_safety = switch (@import("builtin").mode) {
- .Debug, .ReleaseSafe => true,
- .ReleaseFast, .ReleaseSmall => false,
- };
- const safety_checks: @TypeOf(safety_checks_hint) = if (build_mode_has_safety)
- safety_checks_hint
- else
- .assumed_correct;
-
- pub const Stream = OutStream;
- pub const Error = switch (safety_checks) {
- .checked_to_arbitrary_depth => Stream.Error || error{OutOfMemory},
- .checked_to_fixed_depth, .assumed_correct => Stream.Error,
- };
-
- options: StringifyOptions,
-
- stream: OutStream,
- indent_level: usize = 0,
- next_punctuation: enum {
- the_beginning,
- none,
- comma,
- colon,
- } = .the_beginning,
-
- nesting_stack: switch (safety_checks) {
- .checked_to_arbitrary_depth => BitStack,
- .checked_to_fixed_depth => |fixed_buffer_size| [(fixed_buffer_size + 7) >> 3]u8,
- .assumed_correct => void,
- },
-
- raw_streaming_mode: if (build_mode_has_safety)
- enum { none, value, objectField }
- else
- void = if (build_mode_has_safety) .none else {},
-
- pub fn init(safety_allocator: Allocator, stream: OutStream, options: StringifyOptions) Self {
- return .{
- .options = options,
- .stream = stream,
- .nesting_stack = switch (safety_checks) {
- .checked_to_arbitrary_depth => BitStack.init(safety_allocator),
- .checked_to_fixed_depth => |fixed_buffer_size| [_]u8{0} ** ((fixed_buffer_size + 7) >> 3),
- .assumed_correct => {},
- },
- };
- }
-
- /// Only necessary with .checked_to_arbitrary_depth.
- pub fn deinit(self: *Self) void {
- switch (safety_checks) {
- .checked_to_arbitrary_depth => self.nesting_stack.deinit(),
- .checked_to_fixed_depth, .assumed_correct => {},
- }
- self.* = undefined;
- }
-
- pub fn beginArray(self: *Self) Error!void {
- if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
- try self.valueStart();
- try self.stream.writeByte('[');
- try self.pushIndentation(ARRAY_MODE);
- self.next_punctuation = .none;
- }
-
- pub fn beginObject(self: *Self) Error!void {
- if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
- try self.valueStart();
- try self.stream.writeByte('{');
- try self.pushIndentation(OBJECT_MODE);
- self.next_punctuation = .none;
- }
-
- pub fn endArray(self: *Self) Error!void {
- if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
- self.popIndentation(ARRAY_MODE);
- switch (self.next_punctuation) {
- .none => {},
- .comma => {
- try self.indent();
- },
- .the_beginning, .colon => unreachable,
- }
- try self.stream.writeByte(']');
- self.valueDone();
- }
-
- pub fn endObject(self: *Self) Error!void {
- if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
- self.popIndentation(OBJECT_MODE);
- switch (self.next_punctuation) {
- .none => {},
- .comma => {
- try self.indent();
- },
- .the_beginning, .colon => unreachable,
- }
- try self.stream.writeByte('}');
- self.valueDone();
- }
-
- fn pushIndentation(self: *Self, mode: u1) !void {
- switch (safety_checks) {
- .checked_to_arbitrary_depth => {
- try self.nesting_stack.push(mode);
- self.indent_level += 1;
- },
- .checked_to_fixed_depth => {
- BitStack.pushWithStateAssumeCapacity(&self.nesting_stack, &self.indent_level, mode);
- },
- .assumed_correct => {
- self.indent_level += 1;
- },
- }
- }
- fn popIndentation(self: *Self, assert_its_this_one: u1) void {
- switch (safety_checks) {
- .checked_to_arbitrary_depth => {
- assert(self.nesting_stack.pop() == assert_its_this_one);
- self.indent_level -= 1;
- },
- .checked_to_fixed_depth => {
- assert(BitStack.popWithState(&self.nesting_stack, &self.indent_level) == assert_its_this_one);
- },
- .assumed_correct => {
- self.indent_level -= 1;
- },
- }
- }
-
- fn indent(self: *Self) !void {
- var char: u8 = ' ';
- const n_chars = switch (self.options.whitespace) {
- .minified => return,
- .indent_1 => 1 * self.indent_level,
- .indent_2 => 2 * self.indent_level,
- .indent_3 => 3 * self.indent_level,
- .indent_4 => 4 * self.indent_level,
- .indent_8 => 8 * self.indent_level,
- .indent_tab => blk: {
- char = '\t';
- break :blk self.indent_level;
- },
- };
- try self.stream.writeByte('\n');
- try self.stream.writeByteNTimes(char, n_chars);
- }
-
- fn valueStart(self: *Self) !void {
- if (self.isObjectKeyExpected()) |is_it| assert(!is_it); // Call objectField*(), not write(), for object keys.
- return self.valueStartAssumeTypeOk();
- }
- fn objectFieldStart(self: *Self) !void {
- if (self.isObjectKeyExpected()) |is_it| assert(is_it); // Expected write(), not objectField*().
- return self.valueStartAssumeTypeOk();
- }
- fn valueStartAssumeTypeOk(self: *Self) !void {
- assert(!self.isComplete()); // JSON document already complete.
- switch (self.next_punctuation) {
- .the_beginning => {
- // No indentation for the very beginning.
- },
- .none => {
- // First item in a container.
- try self.indent();
- },
- .comma => {
- // Subsequent item in a container.
- try self.stream.writeByte(',');
- try self.indent();
- },
- .colon => {
- try self.stream.writeByte(':');
- if (self.options.whitespace != .minified) {
- try self.stream.writeByte(' ');
- }
- },
- }
- }
- fn valueDone(self: *Self) void {
- self.next_punctuation = .comma;
- }
-
- // Only when safety is enabled:
- fn isObjectKeyExpected(self: *const Self) ?bool {
- switch (safety_checks) {
- .checked_to_arbitrary_depth => return self.indent_level > 0 and
- self.nesting_stack.peek() == OBJECT_MODE and
- self.next_punctuation != .colon,
- .checked_to_fixed_depth => return self.indent_level > 0 and
- BitStack.peekWithState(&self.nesting_stack, self.indent_level) == OBJECT_MODE and
- self.next_punctuation != .colon,
- .assumed_correct => return null,
- }
- }
- fn isComplete(self: *const Self) bool {
- return self.indent_level == 0 and self.next_punctuation == .comma;
- }
-
- /// An alternative to calling `write` that formats a value with `std.fmt`.
- /// This function does the usual punctuation and indentation formatting
- /// assuming the resulting formatted string represents a single complete value;
- /// e.g. `"1"`, `"[]"`, `"[1,2]"`, not `"1,2"`.
- /// This function may be useful for doing your own number formatting.
- pub fn print(self: *Self, comptime fmt: []const u8, args: anytype) Error!void {
- if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
- try self.valueStart();
- try self.stream.print(fmt, args);
- self.valueDone();
- }
-
- /// An alternative to calling `write` that allows you to write directly to the `.stream` field, e.g. with `.stream.writeAll()`.
- /// Call `beginWriteRaw()`, then write a complete value (including any quotes if necessary) directly to the `.stream` field,
- /// then call `endWriteRaw()`.
- /// This can be useful for streaming very long strings into the output without needing it all buffered in memory.
- pub fn beginWriteRaw(self: *Self) !void {
- if (build_mode_has_safety) {
- assert(self.raw_streaming_mode == .none);
- self.raw_streaming_mode = .value;
- }
- try self.valueStart();
- }
-
- /// See `beginWriteRaw`.
- pub fn endWriteRaw(self: *Self) void {
- if (build_mode_has_safety) {
- assert(self.raw_streaming_mode == .value);
- self.raw_streaming_mode = .none;
- }
- self.valueDone();
- }
-
- /// See `WriteStream` for when to call this method.
- /// `key` is the string content of the property name.
- /// Surrounding quotes will be added and any special characters will be escaped.
- /// See also `objectFieldRaw`.
- pub fn objectField(self: *Self, key: []const u8) Error!void {
- if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
- try self.objectFieldStart();
- try encodeJsonString(key, self.options, self.stream);
- self.next_punctuation = .colon;
- }
- /// See `WriteStream` for when to call this method.
- /// `quoted_key` is the complete bytes of the key including quotes and any necessary escape sequences.
- /// A few assertions are performed on the given value to ensure that the caller of this function understands the API contract.
- /// See also `objectField`.
- pub fn objectFieldRaw(self: *Self, quoted_key: []const u8) Error!void {
- if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
- assert(quoted_key.len >= 2 and quoted_key[0] == '"' and quoted_key[quoted_key.len - 1] == '"'); // quoted_key should be "quoted".
- try self.objectFieldStart();
- try self.stream.writeAll(quoted_key);
- self.next_punctuation = .colon;
- }
-
- /// In the rare case that you need to write very long object field names,
- /// this is an alternative to `objectField` and `objectFieldRaw` that allows you to write directly to the `.stream` field
- /// similar to `beginWriteRaw`.
- /// Call `endObjectFieldRaw()` when you're done.
- pub fn beginObjectFieldRaw(self: *Self) !void {
- if (build_mode_has_safety) {
- assert(self.raw_streaming_mode == .none);
- self.raw_streaming_mode = .objectField;
- }
- try self.objectFieldStart();
- }
-
- /// See `beginObjectFieldRaw`.
- pub fn endObjectFieldRaw(self: *Self) void {
- if (build_mode_has_safety) {
- assert(self.raw_streaming_mode == .objectField);
- self.raw_streaming_mode = .none;
- }
- self.next_punctuation = .colon;
- }
-
- /// Renders the given Zig value as JSON.
- ///
- /// Supported types:
- /// * Zig `bool` -> JSON `true` or `false`.
- /// * Zig `?T` -> `null` or the rendering of `T`.
- /// * Zig `i32`, `u64`, etc. -> JSON number or string.
- /// * When option `emit_nonportable_numbers_as_strings` is true, if the value is outside the range `+-1<<53` (the precise integer range of f64), it is rendered as a JSON string in base 10. Otherwise, it is rendered as JSON number.
- /// * Zig floats -> JSON number or string.
- /// * If the value cannot be precisely represented by an f64, it is rendered as a JSON string. Otherwise, it is rendered as JSON number.
- /// * Zig `[]const u8`, `[]u8`, `*[N]u8`, `@Vector(N, u8)`, and similar -> JSON string.
- /// * See `StringifyOptions.emit_strings_as_arrays`.
- /// * If the content is not valid UTF-8, rendered as an array of numbers instead.
- /// * Zig `[]T`, `[N]T`, `*[N]T`, `@Vector(N, T)`, and similar -> JSON array of the rendering of each item.
- /// * Zig tuple -> JSON array of the rendering of each item.
- /// * Zig `struct` -> JSON object with each field in declaration order.
- /// * If the struct declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`. See `std.json.Value` for an example.
- /// * See `StringifyOptions.emit_null_optional_fields`.
- /// * Zig `union(enum)` -> JSON object with one field named for the active tag and a value representing the payload.
- /// * If the payload is `void`, then the emitted value is `{}`.
- /// * If the union declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`.
- /// * Zig `enum` -> JSON string naming the active tag.
- /// * If the enum declares a method `pub fn jsonStringify(self: *@This(), jw: anytype) !void`, it is called to do the serialization instead of the default behavior. The given `jw` is a pointer to this `WriteStream`.
- /// * If the enum is non-exhaustive, unnamed values are rendered as integers.
- /// * Zig untyped enum literal -> JSON string naming the active tag.
- /// * Zig error -> JSON string naming the error.
- /// * Zig `*T` -> the rendering of `T`. Note there is no guard against circular-reference infinite recursion.
- ///
- /// See also alternative functions `print` and `beginWriteRaw`.
- /// For writing object field names, use `objectField` instead.
- pub fn write(self: *Self, value: anytype) Error!void {
- if (build_mode_has_safety) assert(self.raw_streaming_mode == .none);
- const T = @TypeOf(value);
- switch (@typeInfo(T)) {
- .int => {
- try self.valueStart();
- if (self.options.emit_nonportable_numbers_as_strings and
- (value <= -(1 << 53) or value >= (1 << 53)))
- {
- try self.stream.print("\"{}\"", .{value});
- } else {
- try self.stream.print("{}", .{value});
- }
- self.valueDone();
- return;
- },
- .comptime_int => {
- return self.write(@as(std.math.IntFittingRange(value, value), value));
- },
- .float, .comptime_float => {
- if (@as(f64, @floatCast(value)) == value) {
- try self.valueStart();
- try self.stream.print("{}", .{@as(f64, @floatCast(value))});
- self.valueDone();
- return;
- }
- try self.valueStart();
- try self.stream.print("\"{}\"", .{value});
- self.valueDone();
- return;
- },
-
- .bool => {
- try self.valueStart();
- try self.stream.writeAll(if (value) "true" else "false");
- self.valueDone();
- return;
- },
- .null => {
- try self.valueStart();
- try self.stream.writeAll("null");
- self.valueDone();
- return;
- },
- .optional => {
- if (value) |payload| {
- return try self.write(payload);
- } else {
- return try self.write(null);
- }
- },
- .@"enum" => |enum_info| {
- if (std.meta.hasFn(T, "jsonStringify")) {
- return value.jsonStringify(self);
- }
-
- if (!enum_info.is_exhaustive) {
- inline for (enum_info.fields) |field| {
- if (value == @field(T, field.name)) {
- break;
- }
- } else {
- return self.write(@intFromEnum(value));
- }
- }
-
- return self.stringValue(@tagName(value));
- },
- .enum_literal => {
- return self.stringValue(@tagName(value));
- },
- .@"union" => {
- if (std.meta.hasFn(T, "jsonStringify")) {
- return value.jsonStringify(self);
- }
-
- const info = @typeInfo(T).@"union";
- if (info.tag_type) |UnionTagType| {
- try self.beginObject();
- inline for (info.fields) |u_field| {
- if (value == @field(UnionTagType, u_field.name)) {
- try self.objectField(u_field.name);
- if (u_field.type == void) {
- // void value is {}
- try self.beginObject();
- try self.endObject();
- } else {
- try self.write(@field(value, u_field.name));
- }
- break;
- }
- } else {
- unreachable; // No active tag?
- }
- try self.endObject();
- return;
- } else {
- @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'");
- }
- },
- .@"struct" => |S| {
- if (std.meta.hasFn(T, "jsonStringify")) {
- return value.jsonStringify(self);
- }
-
- if (S.is_tuple) {
- try self.beginArray();
- } else {
- try self.beginObject();
- }
- inline for (S.fields) |Field| {
- // don't include void fields
- if (Field.type == void) continue;
-
- var emit_field = true;
-
- // don't include optional fields that are null when emit_null_optional_fields is set to false
- if (@typeInfo(Field.type) == .optional) {
- if (self.options.emit_null_optional_fields == false) {
- if (@field(value, Field.name) == null) {
- emit_field = false;
- }
- }
- }
-
- if (emit_field) {
- if (!S.is_tuple) {
- try self.objectField(Field.name);
- }
- try self.write(@field(value, Field.name));
- }
- }
- if (S.is_tuple) {
- try self.endArray();
- } else {
- try self.endObject();
- }
- return;
- },
- .error_set => return self.stringValue(@errorName(value)),
- .pointer => |ptr_info| switch (ptr_info.size) {
- .one => switch (@typeInfo(ptr_info.child)) {
- .array => {
- // Coerce `*[N]T` to `[]const T`.
- const Slice = []const std.meta.Elem(ptr_info.child);
- return self.write(@as(Slice, value));
- },
- else => {
- return self.write(value.*);
- },
- },
- .many, .slice => {
- if (ptr_info.size == .many and ptr_info.sentinel() == null)
- @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel");
- const slice = if (ptr_info.size == .many) std.mem.span(value) else value;
-
- if (ptr_info.child == u8) {
- // This is a []const u8, or some similar Zig string.
- if (!self.options.emit_strings_as_arrays and std.unicode.utf8ValidateSlice(slice)) {
- return self.stringValue(slice);
- }
- }
-
- try self.beginArray();
- for (slice) |x| {
- try self.write(x);
- }
- try self.endArray();
- return;
- },
- else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
- },
- .array => {
- // Coerce `[N]T` to `*const [N]T` (and then to `[]const T`).
- return self.write(&value);
- },
- .vector => |info| {
- const array: [info.len]info.child = value;
- return self.write(&array);
- },
- else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
- }
- unreachable;
- }
-
- fn stringValue(self: *Self, s: []const u8) !void {
- try self.valueStart();
- try encodeJsonString(s, self.options, self.stream);
- self.valueDone();
- }
- };
-}
-
-fn outputUnicodeEscape(codepoint: u21, out_stream: anytype) !void {
- if (codepoint <= 0xFFFF) {
- // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
- // then it may be represented as a six-character sequence: a reverse solidus, followed
- // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point.
- try out_stream.writeAll("\\u");
- //try w.printInt("x", .{ .width = 4, .fill = '0' }, codepoint);
- try std.fmt.format(out_stream, "{x:0>4}", .{codepoint});
- } else {
- assert(codepoint <= 0x10FFFF);
- // To escape an extended character that is not in the Basic Multilingual Plane,
- // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair.
- const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;
- const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;
- try out_stream.writeAll("\\u");
- //try w.printInt("x", .{ .width = 4, .fill = '0' }, high);
- try std.fmt.format(out_stream, "{x:0>4}", .{high});
- try out_stream.writeAll("\\u");
- //try w.printInt("x", .{ .width = 4, .fill = '0' }, low);
- try std.fmt.format(out_stream, "{x:0>4}", .{low});
- }
-}
-
-fn outputSpecialEscape(c: u8, writer: anytype) !void {
- switch (c) {
- '\\' => try writer.writeAll("\\\\"),
- '\"' => try writer.writeAll("\\\""),
- 0x08 => try writer.writeAll("\\b"),
- 0x0C => try writer.writeAll("\\f"),
- '\n' => try writer.writeAll("\\n"),
- '\r' => try writer.writeAll("\\r"),
- '\t' => try writer.writeAll("\\t"),
- else => try outputUnicodeEscape(c, writer),
- }
-}
-
-/// Write `string` to `writer` as a JSON encoded string.
-pub fn encodeJsonString(string: []const u8, options: StringifyOptions, writer: anytype) !void {
- try writer.writeByte('\"');
- try encodeJsonStringChars(string, options, writer);
- try writer.writeByte('\"');
-}
-
-/// Write `chars` to `writer` as JSON encoded string characters.
-pub fn encodeJsonStringChars(chars: []const u8, options: StringifyOptions, writer: anytype) !void {
- var write_cursor: usize = 0;
- var i: usize = 0;
- if (options.escape_unicode) {
- while (i < chars.len) : (i += 1) {
- switch (chars[i]) {
- // normal ascii character
- 0x20...0x21, 0x23...0x5B, 0x5D...0x7E => {},
- 0x00...0x1F, '\\', '\"' => {
- // Always must escape these.
- try writer.writeAll(chars[write_cursor..i]);
- try outputSpecialEscape(chars[i], writer);
- write_cursor = i + 1;
- },
- 0x7F...0xFF => {
- try writer.writeAll(chars[write_cursor..i]);
- const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable;
- const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable;
- try outputUnicodeEscape(codepoint, writer);
- i += ulen - 1;
- write_cursor = i + 1;
- },
- }
- }
- } else {
- while (i < chars.len) : (i += 1) {
- switch (chars[i]) {
- // normal bytes
- 0x20...0x21, 0x23...0x5B, 0x5D...0xFF => {},
- 0x00...0x1F, '\\', '\"' => {
- // Always must escape these.
- try writer.writeAll(chars[write_cursor..i]);
- try outputSpecialEscape(chars[i], writer);
- write_cursor = i + 1;
- },
- }
- }
- }
- try writer.writeAll(chars[write_cursor..chars.len]);
-}
-
-test {
- _ = @import("./stringify_test.zig");
-}
lib/std/json/stringify_test.zig
@@ -1,504 +0,0 @@
-const std = @import("std");
-const mem = std.mem;
-const testing = std.testing;
-
-const ObjectMap = @import("dynamic.zig").ObjectMap;
-const Value = @import("dynamic.zig").Value;
-
-const StringifyOptions = @import("stringify.zig").StringifyOptions;
-const stringify = @import("stringify.zig").stringify;
-const stringifyMaxDepth = @import("stringify.zig").stringifyMaxDepth;
-const stringifyArbitraryDepth = @import("stringify.zig").stringifyArbitraryDepth;
-const stringifyAlloc = @import("stringify.zig").stringifyAlloc;
-const writeStream = @import("stringify.zig").writeStream;
-const writeStreamMaxDepth = @import("stringify.zig").writeStreamMaxDepth;
-const writeStreamArbitraryDepth = @import("stringify.zig").writeStreamArbitraryDepth;
-
-test "json write stream" {
- var out_buf: [1024]u8 = undefined;
- var slice_stream = std.io.fixedBufferStream(&out_buf);
- const out = slice_stream.writer();
-
- {
- var w = writeStream(out, .{ .whitespace = .indent_2 });
- try testBasicWriteStream(&w, &slice_stream);
- }
-
- {
- var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, 8);
- try testBasicWriteStream(&w, &slice_stream);
- }
-
- {
- var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, null);
- try testBasicWriteStream(&w, &slice_stream);
- }
-
- {
- var w = writeStreamArbitraryDepth(testing.allocator, out, .{ .whitespace = .indent_2 });
- defer w.deinit();
- try testBasicWriteStream(&w, &slice_stream);
- }
-}
-
-fn testBasicWriteStream(w: anytype, slice_stream: anytype) !void {
- slice_stream.reset();
-
- try w.beginObject();
-
- try w.objectField("object");
- var arena_allocator = std.heap.ArenaAllocator.init(testing.allocator);
- defer arena_allocator.deinit();
- try w.write(try getJsonObject(arena_allocator.allocator()));
-
- try w.objectFieldRaw("\"string\"");
- try w.write("This is a string");
-
- try w.objectField("array");
- try w.beginArray();
- try w.write("Another string");
- try w.write(@as(i32, 1));
- try w.write(@as(f32, 3.5));
- try w.endArray();
-
- try w.objectField("int");
- try w.write(@as(i32, 10));
-
- try w.objectField("float");
- try w.write(@as(f32, 3.5));
-
- try w.endObject();
-
- const result = slice_stream.getWritten();
- const expected =
- \\{
- \\ "object": {
- \\ "one": 1,
- \\ "two": 2
- \\ },
- \\ "string": "This is a string",
- \\ "array": [
- \\ "Another string",
- \\ 1,
- \\ 3.5
- \\ ],
- \\ "int": 10,
- \\ "float": 3.5
- \\}
- ;
- try std.testing.expectEqualStrings(expected, result);
-}
-
-fn getJsonObject(allocator: std.mem.Allocator) !Value {
- var value = Value{ .object = ObjectMap.init(allocator) };
- try value.object.put("one", Value{ .integer = @as(i64, @intCast(1)) });
- try value.object.put("two", Value{ .float = 2.0 });
- return value;
-}
-
-test "stringify null optional fields" {
- const MyStruct = struct {
- optional: ?[]const u8 = null,
- required: []const u8 = "something",
- another_optional: ?[]const u8 = null,
- another_required: []const u8 = "something else",
- };
- try testStringify(
- \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"}
- ,
- MyStruct{},
- .{},
- );
- try testStringify(
- \\{"required":"something","another_required":"something else"}
- ,
- MyStruct{},
- .{ .emit_null_optional_fields = false },
- );
-}
-
-test "stringify basic types" {
- try testStringify("false", false, .{});
- try testStringify("true", true, .{});
- try testStringify("null", @as(?u8, null), .{});
- try testStringify("null", @as(?*u32, null), .{});
- try testStringify("42", 42, .{});
- try testStringify("42", 42.0, .{});
- try testStringify("42", @as(u8, 42), .{});
- try testStringify("42", @as(u128, 42), .{});
- try testStringify("9999999999999999", 9999999999999999, .{});
- try testStringify("42", @as(f32, 42), .{});
- try testStringify("42", @as(f64, 42), .{});
- try testStringify("\"ItBroke\"", @as(anyerror, error.ItBroke), .{});
- try testStringify("\"ItBroke\"", error.ItBroke, .{});
-}
-
-test "stringify string" {
- try testStringify("\"hello\"", "hello", .{});
- try testStringify("\"with\\nescapes\\r\"", "with\nescapes\r", .{});
- try testStringify("\"with\\nescapes\\r\"", "with\nescapes\r", .{ .escape_unicode = true });
- try testStringify("\"with unicode\\u0001\"", "with unicode\u{1}", .{});
- try testStringify("\"with unicode\\u0001\"", "with unicode\u{1}", .{ .escape_unicode = true });
- try testStringify("\"with unicode\u{80}\"", "with unicode\u{80}", .{});
- try testStringify("\"with unicode\\u0080\"", "with unicode\u{80}", .{ .escape_unicode = true });
- try testStringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", .{});
- try testStringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", .{ .escape_unicode = true });
- try testStringify("\"with unicode\u{100}\"", "with unicode\u{100}", .{});
- try testStringify("\"with unicode\\u0100\"", "with unicode\u{100}", .{ .escape_unicode = true });
- try testStringify("\"with unicode\u{800}\"", "with unicode\u{800}", .{});
- try testStringify("\"with unicode\\u0800\"", "with unicode\u{800}", .{ .escape_unicode = true });
- try testStringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", .{});
- try testStringify("\"with unicode\\u8000\"", "with unicode\u{8000}", .{ .escape_unicode = true });
- try testStringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", .{});
- try testStringify("\"with unicode\\ud799\"", "with unicode\u{D799}", .{ .escape_unicode = true });
- try testStringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", .{});
- try testStringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", .{ .escape_unicode = true });
- try testStringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", .{});
- try testStringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", .{ .escape_unicode = true });
-}
-
-test "stringify many-item sentinel-terminated string" {
- try testStringify("\"hello\"", @as([*:0]const u8, "hello"), .{});
- try testStringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), .{ .escape_unicode = true });
- try testStringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), .{ .escape_unicode = true });
-}
-
-test "stringify enums" {
- const E = enum {
- foo,
- bar,
- };
- try testStringify("\"foo\"", E.foo, .{});
- try testStringify("\"bar\"", E.bar, .{});
-}
-
-test "stringify non-exhaustive enum" {
- const E = enum(u8) {
- foo = 0,
- _,
- };
- try testStringify("\"foo\"", E.foo, .{});
- try testStringify("1", @as(E, @enumFromInt(1)), .{});
-}
-
-test "stringify enum literals" {
- try testStringify("\"foo\"", .foo, .{});
- try testStringify("\"bar\"", .bar, .{});
-}
-
-test "stringify tagged unions" {
- const T = union(enum) {
- nothing,
- foo: u32,
- bar: bool,
- };
- try testStringify("{\"nothing\":{}}", T{ .nothing = {} }, .{});
- try testStringify("{\"foo\":42}", T{ .foo = 42 }, .{});
- try testStringify("{\"bar\":true}", T{ .bar = true }, .{});
-}
-
-test "stringify struct" {
- try testStringify("{\"foo\":42}", struct {
- foo: u32,
- }{ .foo = 42 }, .{});
-}
-
-test "emit_strings_as_arrays" {
- // Should only affect string values, not object keys.
- try testStringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, .{});
- try testStringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, .{ .emit_strings_as_arrays = true });
- // Should *not* affect these types:
- try testStringify("\"foo\"", @as(enum { foo, bar }, .foo), .{ .emit_strings_as_arrays = true });
- try testStringify("\"ItBroke\"", error.ItBroke, .{ .emit_strings_as_arrays = true });
- // Should work on these:
- try testStringify("\"bar\"", @Vector(3, u8){ 'b', 'a', 'r' }, .{});
- try testStringify("[98,97,114]", @Vector(3, u8){ 'b', 'a', 'r' }, .{ .emit_strings_as_arrays = true });
- try testStringify("\"bar\"", [3]u8{ 'b', 'a', 'r' }, .{});
- try testStringify("[98,97,114]", [3]u8{ 'b', 'a', 'r' }, .{ .emit_strings_as_arrays = true });
-}
-
-test "stringify struct with indentation" {
- try testStringify(
- \\{
- \\ "foo": 42,
- \\ "bar": [
- \\ 1,
- \\ 2,
- \\ 3
- \\ ]
- \\}
- ,
- struct {
- foo: u32,
- bar: [3]u32,
- }{
- .foo = 42,
- .bar = .{ 1, 2, 3 },
- },
- .{ .whitespace = .indent_4 },
- );
- try testStringify(
- "{\n\t\"foo\": 42,\n\t\"bar\": [\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}",
- struct {
- foo: u32,
- bar: [3]u32,
- }{
- .foo = 42,
- .bar = .{ 1, 2, 3 },
- },
- .{ .whitespace = .indent_tab },
- );
- try testStringify(
- \\{"foo":42,"bar":[1,2,3]}
- ,
- struct {
- foo: u32,
- bar: [3]u32,
- }{
- .foo = 42,
- .bar = .{ 1, 2, 3 },
- },
- .{ .whitespace = .minified },
- );
-}
-
-test "stringify struct with void field" {
- try testStringify("{\"foo\":42}", struct {
- foo: u32,
- bar: void = {},
- }{ .foo = 42 }, .{});
-}
-
-test "stringify array of structs" {
- const MyStruct = struct {
- foo: u32,
- };
- try testStringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
- MyStruct{ .foo = 42 },
- MyStruct{ .foo = 100 },
- MyStruct{ .foo = 1000 },
- }, .{});
-}
-
-test "stringify struct with custom stringifier" {
- try testStringify("[\"something special\",42]", struct {
- foo: u32,
- const Self = @This();
- pub fn jsonStringify(value: @This(), jws: anytype) !void {
- _ = value;
- try jws.beginArray();
- try jws.write("something special");
- try jws.write(42);
- try jws.endArray();
- }
- }{ .foo = 42 }, .{});
-}
-
-test "stringify vector" {
- try testStringify("[1,1]", @as(@Vector(2, u32), @splat(1)), .{});
- try testStringify("\"AA\"", @as(@Vector(2, u8), @splat('A')), .{});
- try testStringify("[65,65]", @as(@Vector(2, u8), @splat('A')), .{ .emit_strings_as_arrays = true });
-}
-
-test "stringify tuple" {
- try testStringify("[\"foo\",42]", std.meta.Tuple(&.{ []const u8, usize }){ "foo", 42 }, .{});
-}
-
-fn testStringify(expected: []const u8, value: anytype, options: StringifyOptions) !void {
- const ValidationWriter = struct {
- const Self = @This();
- pub const Writer = std.io.GenericWriter(*Self, Error, write);
- pub const Error = error{
- TooMuchData,
- DifferentData,
- };
-
- expected_remaining: []const u8,
-
- fn init(exp: []const u8) Self {
- return .{ .expected_remaining = exp };
- }
-
- pub fn writer(self: *Self) Writer {
- return .{ .context = self };
- }
-
- fn write(self: *Self, bytes: []const u8) Error!usize {
- if (self.expected_remaining.len < bytes.len) {
- std.debug.print(
- \\====== expected this output: =========
- \\{s}
- \\======== instead found this: =========
- \\{s}
- \\======================================
- , .{
- self.expected_remaining,
- bytes,
- });
- return error.TooMuchData;
- }
- if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) {
- std.debug.print(
- \\====== expected this output: =========
- \\{s}
- \\======== instead found this: =========
- \\{s}
- \\======================================
- , .{
- self.expected_remaining[0..bytes.len],
- bytes,
- });
- return error.DifferentData;
- }
- self.expected_remaining = self.expected_remaining[bytes.len..];
- return bytes.len;
- }
- };
-
- var vos = ValidationWriter.init(expected);
- try stringifyArbitraryDepth(testing.allocator, value, options, vos.writer());
- if (vos.expected_remaining.len > 0) return error.NotEnoughData;
-
- // Also test with safety disabled.
- try testStringifyMaxDepth(expected, value, options, null);
- try testStringifyArbitraryDepth(expected, value, options);
-}
-
-fn testStringifyMaxDepth(expected: []const u8, value: anytype, options: StringifyOptions, comptime max_depth: ?usize) !void {
- var out_buf: [1024]u8 = undefined;
- var slice_stream = std.io.fixedBufferStream(&out_buf);
- const out = slice_stream.writer();
-
- try stringifyMaxDepth(value, options, out, max_depth);
- const got = slice_stream.getWritten();
-
- try testing.expectEqualStrings(expected, got);
-}
-
-fn testStringifyArbitraryDepth(expected: []const u8, value: anytype, options: StringifyOptions) !void {
- var out_buf: [1024]u8 = undefined;
- var slice_stream = std.io.fixedBufferStream(&out_buf);
- const out = slice_stream.writer();
-
- try stringifyArbitraryDepth(testing.allocator, value, options, out);
- const got = slice_stream.getWritten();
-
- try testing.expectEqualStrings(expected, got);
-}
-
-test "stringify alloc" {
- const allocator = std.testing.allocator;
- const expected =
- \\{"foo":"bar","answer":42,"my_friend":"sammy"}
- ;
- const actual = try stringifyAlloc(allocator, .{ .foo = "bar", .answer = 42, .my_friend = "sammy" }, .{});
- defer allocator.free(actual);
-
- try std.testing.expectEqualStrings(expected, actual);
-}
-
-test "comptime stringify" {
- comptime testStringifyMaxDepth("false", false, .{}, null) catch unreachable;
- comptime testStringifyMaxDepth("false", false, .{}, 0) catch unreachable;
- comptime testStringifyArbitraryDepth("false", false, .{}) catch unreachable;
-
- const MyStruct = struct {
- foo: u32,
- };
- comptime testStringifyMaxDepth("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
- MyStruct{ .foo = 42 },
- MyStruct{ .foo = 100 },
- MyStruct{ .foo = 1000 },
- }, .{}, null) catch unreachable;
- comptime testStringifyMaxDepth("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
- MyStruct{ .foo = 42 },
- MyStruct{ .foo = 100 },
- MyStruct{ .foo = 1000 },
- }, .{}, 8) catch unreachable;
-}
-
-test "print" {
- var out_buf: [1024]u8 = undefined;
- var slice_stream = std.io.fixedBufferStream(&out_buf);
- const out = slice_stream.writer();
-
- var w = writeStream(out, .{ .whitespace = .indent_2 });
- defer w.deinit();
-
- try w.beginObject();
- try w.objectField("a");
- try w.print("[ ]", .{});
- try w.objectField("b");
- try w.beginArray();
- try w.print("[{s}] ", .{"[]"});
- try w.print(" {}", .{12345});
- try w.endArray();
- try w.endObject();
-
- const result = slice_stream.getWritten();
- const expected =
- \\{
- \\ "a": [ ],
- \\ "b": [
- \\ [[]] ,
- \\ 12345
- \\ ]
- \\}
- ;
- try std.testing.expectEqualStrings(expected, result);
-}
-
-test "nonportable numbers" {
- try testStringify("9999999999999999", 9999999999999999, .{});
- try testStringify("\"9999999999999999\"", 9999999999999999, .{ .emit_nonportable_numbers_as_strings = true });
-}
-
-test "stringify raw streaming" {
- var out_buf: [1024]u8 = undefined;
- var slice_stream = std.io.fixedBufferStream(&out_buf);
- const out = slice_stream.writer();
-
- {
- var w = writeStream(out, .{ .whitespace = .indent_2 });
- try testRawStreaming(&w, &slice_stream);
- }
-
- {
- var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, 8);
- try testRawStreaming(&w, &slice_stream);
- }
-
- {
- var w = writeStreamMaxDepth(out, .{ .whitespace = .indent_2 }, null);
- try testRawStreaming(&w, &slice_stream);
- }
-
- {
- var w = writeStreamArbitraryDepth(testing.allocator, out, .{ .whitespace = .indent_2 });
- defer w.deinit();
- try testRawStreaming(&w, &slice_stream);
- }
-}
-
-fn testRawStreaming(w: anytype, slice_stream: anytype) !void {
- slice_stream.reset();
-
- try w.beginObject();
- try w.beginObjectFieldRaw();
- try w.stream.writeAll("\"long");
- try w.stream.writeAll(" key\"");
- w.endObjectFieldRaw();
- try w.beginWriteRaw();
- try w.stream.writeAll("\"long");
- try w.stream.writeAll(" value\"");
- w.endWriteRaw();
- try w.endObject();
-
- const result = slice_stream.getWritten();
- const expected =
- \\{
- \\ "long key": "long value"
- \\}
- ;
- try std.testing.expectEqualStrings(expected, result);
-}
lib/std/json/test.zig
@@ -1,10 +1,9 @@
const std = @import("std");
+const json = std.json;
const testing = std.testing;
const parseFromSlice = @import("./static.zig").parseFromSlice;
-const validate = @import("./scanner.zig").validate;
-const JsonScanner = @import("./scanner.zig").Scanner;
+const Scanner = @import("./Scanner.zig");
const Value = @import("./dynamic.zig").Value;
-const stringifyAlloc = @import("./stringify.zig").stringifyAlloc;
// Support for JSONTestSuite.zig
pub fn ok(s: []const u8) !void {
@@ -20,7 +19,7 @@ pub fn any(s: []const u8) !void {
testHighLevelDynamicParser(s) catch {};
}
fn testLowLevelScanner(s: []const u8) !void {
- var scanner = JsonScanner.initCompleteInput(testing.allocator, s);
+ var scanner = Scanner.initCompleteInput(testing.allocator, s);
defer scanner.deinit();
while (true) {
const token = try scanner.next();
@@ -47,12 +46,12 @@ test "n_object_closed_missing_value" {
}
fn roundTrip(s: []const u8) !void {
- try testing.expect(try validate(testing.allocator, s));
+ try testing.expect(try Scanner.validate(testing.allocator, s));
var parsed = try parseFromSlice(Value, testing.allocator, s, .{});
defer parsed.deinit();
- const rendered = try stringifyAlloc(testing.allocator, parsed.value, .{});
+ const rendered = try json.Stringify.valueAlloc(testing.allocator, parsed.value, .{});
defer testing.allocator.free(rendered);
try testing.expectEqualStrings(s, rendered);
lib/std/json.zig
@@ -10,8 +10,8 @@
//! The high-level `stringify` serializes a Zig or `Value` type into JSON.
const builtin = @import("builtin");
-const testing = @import("std").testing;
-const ArrayList = @import("std").ArrayList;
+const std = @import("std");
+const testing = std.testing;
test Scanner {
var scanner = Scanner.initCompleteInput(testing.allocator, "{\"foo\": 123}\n");
@@ -41,11 +41,13 @@ test Value {
try testing.expectEqualSlices(u8, "goes", parsed.value.object.get("anything").?.string);
}
-test writeStream {
- var out = ArrayList(u8).init(testing.allocator);
+test Stringify {
+ var out: std.io.Writer.Allocating = .init(testing.allocator);
+ var write_stream: Stringify = .{
+ .writer = &out.writer,
+ .options = .{ .whitespace = .indent_2 },
+ };
defer out.deinit();
- var write_stream = writeStream(out.writer(), .{ .whitespace = .indent_2 });
- defer write_stream.deinit();
try write_stream.beginObject();
try write_stream.objectField("foo");
try write_stream.write(123);
@@ -55,16 +57,7 @@ test writeStream {
\\ "foo": 123
\\}
;
- try testing.expectEqualSlices(u8, expected, out.items);
-}
-
-test stringify {
- var out = ArrayList(u8).init(testing.allocator);
- defer out.deinit();
-
- const T = struct { a: i32, b: []const u8 };
- try stringify(T{ .a = 123, .b = "xy" }, .{}, out.writer());
- try testing.expectEqualSlices(u8, "{\"a\":123,\"b\":\"xy\"}", out.items);
+ try testing.expectEqualSlices(u8, expected, out.getWritten());
}
pub const ObjectMap = @import("json/dynamic.zig").ObjectMap;
@@ -73,18 +66,18 @@ pub const Value = @import("json/dynamic.zig").Value;
pub const ArrayHashMap = @import("json/hashmap.zig").ArrayHashMap;
-pub const validate = @import("json/scanner.zig").validate;
-pub const Error = @import("json/scanner.zig").Error;
-pub const reader = @import("json/scanner.zig").reader;
-pub const default_buffer_size = @import("json/scanner.zig").default_buffer_size;
-pub const Token = @import("json/scanner.zig").Token;
-pub const TokenType = @import("json/scanner.zig").TokenType;
-pub const Diagnostics = @import("json/scanner.zig").Diagnostics;
-pub const AllocWhen = @import("json/scanner.zig").AllocWhen;
-pub const default_max_value_len = @import("json/scanner.zig").default_max_value_len;
-pub const Reader = @import("json/scanner.zig").Reader;
-pub const Scanner = @import("json/scanner.zig").Scanner;
-pub const isNumberFormattedLikeAnInteger = @import("json/scanner.zig").isNumberFormattedLikeAnInteger;
+pub const Scanner = @import("json/Scanner.zig");
+pub const validate = Scanner.validate;
+pub const Error = Scanner.Error;
+pub const reader = Scanner.reader;
+pub const default_buffer_size = Scanner.default_buffer_size;
+pub const Token = Scanner.Token;
+pub const TokenType = Scanner.TokenType;
+pub const Diagnostics = Scanner.Diagnostics;
+pub const AllocWhen = Scanner.AllocWhen;
+pub const default_max_value_len = Scanner.default_max_value_len;
+pub const Reader = Scanner.Reader;
+pub const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger;
pub const ParseOptions = @import("json/static.zig").ParseOptions;
pub const Parsed = @import("json/static.zig").Parsed;
@@ -99,27 +92,49 @@ pub const innerParseFromValue = @import("json/static.zig").innerParseFromValue;
pub const ParseError = @import("json/static.zig").ParseError;
pub const ParseFromValueError = @import("json/static.zig").ParseFromValueError;
-pub const StringifyOptions = @import("json/stringify.zig").StringifyOptions;
-pub const stringify = @import("json/stringify.zig").stringify;
-pub const stringifyMaxDepth = @import("json/stringify.zig").stringifyMaxDepth;
-pub const stringifyArbitraryDepth = @import("json/stringify.zig").stringifyArbitraryDepth;
-pub const stringifyAlloc = @import("json/stringify.zig").stringifyAlloc;
-pub const writeStream = @import("json/stringify.zig").writeStream;
-pub const writeStreamMaxDepth = @import("json/stringify.zig").writeStreamMaxDepth;
-pub const writeStreamArbitraryDepth = @import("json/stringify.zig").writeStreamArbitraryDepth;
-pub const WriteStream = @import("json/stringify.zig").WriteStream;
-pub const encodeJsonString = @import("json/stringify.zig").encodeJsonString;
-pub const encodeJsonStringChars = @import("json/stringify.zig").encodeJsonStringChars;
-
-pub const Formatter = @import("json/fmt.zig").Formatter;
-pub const fmt = @import("json/fmt.zig").fmt;
+pub const Stringify = @import("json/Stringify.zig");
+
+/// Returns a formatter that formats the given value using stringify.
+pub fn fmt(value: anytype, options: Stringify.Options) Formatter(@TypeOf(value)) {
+ return Formatter(@TypeOf(value)){ .value = value, .options = options };
+}
+
+test fmt {
+ const expectFmt = std.testing.expectFmt;
+ try expectFmt("123", "{f}", .{fmt(@as(u32, 123), .{})});
+ try expectFmt(
+ \\{"num":927,"msg":"hello","sub":{"mybool":true}}
+ , "{f}", .{fmt(struct {
+ num: u32,
+ msg: []const u8,
+ sub: struct {
+ mybool: bool,
+ },
+ }{
+ .num = 927,
+ .msg = "hello",
+ .sub = .{ .mybool = true },
+ }, .{})});
+}
+
+/// Formats the given value using stringify.
+pub fn Formatter(comptime T: type) type {
+ return struct {
+ value: T,
+ options: Stringify.Options,
+
+ pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void {
+ try Stringify.value(self.value, self.options, writer);
+ }
+ };
+}
test {
_ = @import("json/test.zig");
- _ = @import("json/scanner.zig");
+ _ = Scanner;
_ = @import("json/dynamic.zig");
_ = @import("json/hashmap.zig");
_ = @import("json/static.zig");
- _ = @import("json/stringify.zig");
+ _ = Stringify;
_ = @import("json/JSONTestSuite_test.zig");
}