master
  1const std = @import("std");
  2const Scanner = @import("Scanner.zig");
  3const Token = Scanner.Token;
  4const TokenType = Scanner.TokenType;
  5const Diagnostics = Scanner.Diagnostics;
  6const Error = Scanner.Error;
  7const validate = Scanner.validate;
  8const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger;
  9
 10const example_document_str =
 11    \\{
 12    \\  "Image": {
 13    \\      "Width":  800,
 14    \\      "Height": 600,
 15    \\      "Title":  "View from 15th Floor",
 16    \\      "Thumbnail": {
 17    \\          "Url":    "http://www.example.com/image/481989943",
 18    \\          "Height": 125,
 19    \\          "Width":  100
 20    \\      },
 21    \\      "Animated" : false,
 22    \\      "IDs": [116, 943, 234, 38793]
 23    \\    }
 24    \\}
 25;
 26
 27fn expectNext(scanner_or_reader: anytype, expected_token: Token) !void {
 28    return expectEqualTokens(expected_token, try scanner_or_reader.next());
 29}
 30
 31fn expectPeekNext(scanner_or_reader: anytype, expected_token_type: TokenType, expected_token: Token) !void {
 32    try std.testing.expectEqual(expected_token_type, try scanner_or_reader.peekNextTokenType());
 33    try expectEqualTokens(expected_token, try scanner_or_reader.next());
 34}
 35
 36test "token" {
 37    var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str);
 38    defer scanner.deinit();
 39
 40    try expectNext(&scanner, .object_begin);
 41    try expectNext(&scanner, Token{ .string = "Image" });
 42    try expectNext(&scanner, .object_begin);
 43    try expectNext(&scanner, Token{ .string = "Width" });
 44    try expectNext(&scanner, Token{ .number = "800" });
 45    try expectNext(&scanner, Token{ .string = "Height" });
 46    try expectNext(&scanner, Token{ .number = "600" });
 47    try expectNext(&scanner, Token{ .string = "Title" });
 48    try expectNext(&scanner, Token{ .string = "View from 15th Floor" });
 49    try expectNext(&scanner, Token{ .string = "Thumbnail" });
 50    try expectNext(&scanner, .object_begin);
 51    try expectNext(&scanner, Token{ .string = "Url" });
 52    try expectNext(&scanner, Token{ .string = "http://www.example.com/image/481989943" });
 53    try expectNext(&scanner, Token{ .string = "Height" });
 54    try expectNext(&scanner, Token{ .number = "125" });
 55    try expectNext(&scanner, Token{ .string = "Width" });
 56    try expectNext(&scanner, Token{ .number = "100" });
 57    try expectNext(&scanner, .object_end);
 58    try expectNext(&scanner, Token{ .string = "Animated" });
 59    try expectNext(&scanner, .false);
 60    try expectNext(&scanner, Token{ .string = "IDs" });
 61    try expectNext(&scanner, .array_begin);
 62    try expectNext(&scanner, Token{ .number = "116" });
 63    try expectNext(&scanner, Token{ .number = "943" });
 64    try expectNext(&scanner, Token{ .number = "234" });
 65    try expectNext(&scanner, Token{ .number = "38793" });
 66    try expectNext(&scanner, .array_end);
 67    try expectNext(&scanner, .object_end);
 68    try expectNext(&scanner, .object_end);
 69    try expectNext(&scanner, .end_of_document);
 70}
 71
 72const all_types_test_case =
 73    \\[
 74    \\  "", "a\nb",
 75    \\  0, 0.0, -1.1e-1,
 76    \\  true, false, null,
 77    \\  {"a": {}},
 78    \\  []
 79    \\]
 80;
 81
 82fn testAllTypes(source: anytype, large_buffer: bool) !void {
 83    try expectPeekNext(source, .array_begin, .array_begin);
 84    try expectPeekNext(source, .string, Token{ .string = "" });
 85    try expectPeekNext(source, .string, Token{ .partial_string = "a" });
 86    try expectPeekNext(source, .string, Token{ .partial_string_escaped_1 = "\n".* });
 87    if (large_buffer) {
 88        try expectPeekNext(source, .string, Token{ .string = "b" });
 89    } else {
 90        try expectPeekNext(source, .string, Token{ .partial_string = "b" });
 91        try expectPeekNext(source, .string, Token{ .string = "" });
 92    }
 93    if (large_buffer) {
 94        try expectPeekNext(source, .number, Token{ .number = "0" });
 95    } else {
 96        try expectPeekNext(source, .number, Token{ .partial_number = "0" });
 97        try expectPeekNext(source, .number, Token{ .number = "" });
 98    }
 99    if (large_buffer) {
100        try expectPeekNext(source, .number, Token{ .number = "0.0" });
101    } else {
102        try expectPeekNext(source, .number, Token{ .partial_number = "0" });
103        try expectPeekNext(source, .number, Token{ .partial_number = "." });
104        try expectPeekNext(source, .number, Token{ .partial_number = "0" });
105        try expectPeekNext(source, .number, Token{ .number = "" });
106    }
107    if (large_buffer) {
108        try expectPeekNext(source, .number, Token{ .number = "-1.1e-1" });
109    } else {
110        try expectPeekNext(source, .number, Token{ .partial_number = "-" });
111        try expectPeekNext(source, .number, Token{ .partial_number = "1" });
112        try expectPeekNext(source, .number, Token{ .partial_number = "." });
113        try expectPeekNext(source, .number, Token{ .partial_number = "1" });
114        try expectPeekNext(source, .number, Token{ .partial_number = "e" });
115        try expectPeekNext(source, .number, Token{ .partial_number = "-" });
116        try expectPeekNext(source, .number, Token{ .partial_number = "1" });
117        try expectPeekNext(source, .number, Token{ .number = "" });
118    }
119    try expectPeekNext(source, .true, .true);
120    try expectPeekNext(source, .false, .false);
121    try expectPeekNext(source, .null, .null);
122    try expectPeekNext(source, .object_begin, .object_begin);
123    if (large_buffer) {
124        try expectPeekNext(source, .string, Token{ .string = "a" });
125    } else {
126        try expectPeekNext(source, .string, Token{ .partial_string = "a" });
127        try expectPeekNext(source, .string, Token{ .string = "" });
128    }
129    try expectPeekNext(source, .object_begin, .object_begin);
130    try expectPeekNext(source, .object_end, .object_end);
131    try expectPeekNext(source, .object_end, .object_end);
132    try expectPeekNext(source, .array_begin, .array_begin);
133    try expectPeekNext(source, .array_end, .array_end);
134    try expectPeekNext(source, .array_end, .array_end);
135    try expectPeekNext(source, .end_of_document, .end_of_document);
136}
137
138test "peek all types" {
139    var scanner = Scanner.initCompleteInput(std.testing.allocator, all_types_test_case);
140    defer scanner.deinit();
141    try testAllTypes(&scanner, true);
142
143    var stream: std.Io.Reader = .fixed(all_types_test_case);
144    var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
145    defer json_reader.deinit();
146    try testAllTypes(&json_reader, true);
147
148    var tiny_buffer: [1]u8 = undefined;
149    var tiny_stream: std.testing.Reader = .init(&tiny_buffer, &.{.{ .buffer = all_types_test_case }});
150    tiny_stream.artificial_limit = .limited(1);
151    var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream.interface);
152    defer tiny_json_reader.deinit();
153    try testAllTypes(&tiny_json_reader, false);
154}
155
156test "token mismatched close" {
157    var scanner = Scanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }");
158    defer scanner.deinit();
159    try expectNext(&scanner, .array_begin);
160    try expectNext(&scanner, Token{ .number = "102" });
161    try expectNext(&scanner, Token{ .number = "111" });
162    try expectNext(&scanner, Token{ .number = "111" });
163    try std.testing.expectError(error.SyntaxError, scanner.next());
164}
165
166test "token premature object close" {
167    var scanner = Scanner.initCompleteInput(std.testing.allocator, "{ \"key\": }");
168    defer scanner.deinit();
169    try expectNext(&scanner, .object_begin);
170    try expectNext(&scanner, Token{ .string = "key" });
171    try std.testing.expectError(error.SyntaxError, scanner.next());
172}
173
174test "Scanner basic" {
175    var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str);
176    defer scanner.deinit();
177
178    while (true) {
179        const token = try scanner.next();
180        if (token == .end_of_document) break;
181    }
182}
183
184test "Scanner.Reader basic" {
185    var stream: std.Io.Reader = .fixed(example_document_str);
186
187    var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
188    defer json_reader.deinit();
189
190    while (true) {
191        const token = try json_reader.next();
192        if (token == .end_of_document) break;
193    }
194}
195
196const number_test_stems = .{
197    .{ "", "-" },
198    .{ "0", "1", "10", "9999999999999999999999999" },
199    .{ "", ".0", ".999999999999999999999999" },
200    .{ "", "e0", "E0", "e+0", "e-0", "e9999999999999999999999999999" },
201};
202const number_test_items = blk: {
203    var ret: []const []const u8 = &[_][]const u8{};
204    for (number_test_stems[0]) |s0| {
205        for (number_test_stems[1]) |s1| {
206            for (number_test_stems[2]) |s2| {
207                for (number_test_stems[3]) |s3| {
208                    ret = ret ++ &[_][]const u8{s0 ++ s1 ++ s2 ++ s3};
209                }
210            }
211        }
212    }
213    break :blk ret;
214};
215
216test "numbers" {
217    for (number_test_items) |number_str| {
218        var scanner = Scanner.initCompleteInput(std.testing.allocator, number_str);
219        defer scanner.deinit();
220
221        const token = try scanner.next();
222        const value = token.number; // assert this is a number
223        try std.testing.expectEqualStrings(number_str, value);
224
225        try std.testing.expectEqual(Token.end_of_document, try scanner.next());
226    }
227}
228
229const string_test_cases = .{
230    // The left is JSON without the "quotes".
231    // The right is the expected unescaped content.
232    .{ "", "" },
233    .{ "\\\\", "\\" },
234    .{ "a\\\\b", "a\\b" },
235    .{ "a\\\"b", "a\"b" },
236    .{ "\\n", "\n" },
237    .{ "\\u000a", "\n" },
238    .{ "𝄞", "\u{1D11E}" },
239    .{ "\\uD834\\uDD1E", "\u{1D11E}" },
240    .{ "\\uD87F\\uDFFE", "\u{2FFFE}" },
241    .{ "\\uff20", "" },
242};
243
244test "strings" {
245    inline for (string_test_cases) |tuple| {
246        var stream: std.Io.Reader = .fixed("\"" ++ tuple[0] ++ "\"");
247        var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
248        defer arena.deinit();
249        var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
250        defer json_reader.deinit();
251
252        const token = try json_reader.nextAlloc(arena.allocator(), .alloc_if_needed);
253        const value = switch (token) {
254            .string => |value| value,
255            .allocated_string => |value| value,
256            else => return error.ExpectedString,
257        };
258        try std.testing.expectEqualStrings(tuple[1], value);
259
260        try std.testing.expectEqual(Token.end_of_document, try json_reader.next());
261    }
262}
263
264const nesting_test_cases = .{
265    .{ null, "[]" },
266    .{ null, "{}" },
267    .{ error.SyntaxError, "[}" },
268    .{ error.SyntaxError, "{]" },
269    .{ null, "[" ** 1000 ++ "]" ** 1000 },
270    .{ null, "{\"\":" ** 1000 ++ "0" ++ "}" ** 1000 },
271    .{ error.SyntaxError, "[" ** 1000 ++ "]" ** 999 ++ "}" },
272    .{ error.SyntaxError, "{\"\":" ** 1000 ++ "0" ++ "}" ** 999 ++ "]" },
273    .{ error.SyntaxError, "[" ** 1000 ++ "]" ** 1001 },
274    .{ error.SyntaxError, "{\"\":" ** 1000 ++ "0" ++ "}" ** 1001 },
275    .{ error.UnexpectedEndOfInput, "[" ** 1000 ++ "]" ** 999 },
276    .{ error.UnexpectedEndOfInput, "{\"\":" ** 1000 ++ "0" ++ "}" ** 999 },
277};
278
279test "nesting" {
280    inline for (nesting_test_cases) |tuple| {
281        const maybe_error = tuple[0];
282        const document_str = tuple[1];
283
284        expectMaybeError(document_str, maybe_error) catch |err| {
285            std.debug.print("in json document: {s}\n", .{document_str});
286            return err;
287        };
288    }
289}
290
291fn expectMaybeError(document_str: []const u8, maybe_error: ?Error) !void {
292    var scanner = Scanner.initCompleteInput(std.testing.allocator, document_str);
293    defer scanner.deinit();
294
295    while (true) {
296        const token = scanner.next() catch |err| {
297            if (maybe_error) |expected_err| {
298                if (err == expected_err) return;
299            }
300            return err;
301        };
302        if (token == .end_of_document) break;
303    }
304    if (maybe_error != null) return error.ExpectedError;
305}
306
307fn expectEqualTokens(expected_token: Token, actual_token: Token) !void {
308    try std.testing.expectEqual(std.meta.activeTag(expected_token), std.meta.activeTag(actual_token));
309    switch (expected_token) {
310        .number => |expected_value| {
311            try std.testing.expectEqualStrings(expected_value, actual_token.number);
312        },
313        .allocated_number => |expected_value| {
314            try std.testing.expectEqualStrings(expected_value, actual_token.allocated_number);
315        },
316        .partial_number => |expected_value| {
317            try std.testing.expectEqualStrings(expected_value, actual_token.partial_number);
318        },
319
320        .string => |expected_value| {
321            try std.testing.expectEqualStrings(expected_value, actual_token.string);
322        },
323        .allocated_string => |expected_value| {
324            try std.testing.expectEqualStrings(expected_value, actual_token.allocated_string);
325        },
326        .partial_string => |expected_value| {
327            try std.testing.expectEqualStrings(expected_value, actual_token.partial_string);
328        },
329        .partial_string_escaped_1 => |expected_value| {
330            try std.testing.expectEqualStrings(&expected_value, &actual_token.partial_string_escaped_1);
331        },
332        .partial_string_escaped_2 => |expected_value| {
333            try std.testing.expectEqualStrings(&expected_value, &actual_token.partial_string_escaped_2);
334        },
335        .partial_string_escaped_3 => |expected_value| {
336            try std.testing.expectEqualStrings(&expected_value, &actual_token.partial_string_escaped_3);
337        },
338        .partial_string_escaped_4 => |expected_value| {
339            try std.testing.expectEqualStrings(&expected_value, &actual_token.partial_string_escaped_4);
340        },
341
342        .object_begin,
343        .object_end,
344        .array_begin,
345        .array_end,
346        .true,
347        .false,
348        .null,
349        .end_of_document,
350        => {},
351    }
352}
353
354fn testTinyBufferSize(document_str: []const u8) !void {
355    var tiny_stream: std.Io.Reader = .fixed(document_str);
356    var normal_stream: std.Io.Reader = .fixed(document_str);
357
358    var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream);
359    defer tiny_json_reader.deinit();
360    var normal_json_reader: Scanner.Reader = .init(std.testing.allocator, &normal_stream);
361    defer normal_json_reader.deinit();
362
363    expectEqualStreamOfTokens(&normal_json_reader, &tiny_json_reader) catch |err| {
364        std.debug.print("in json document: {s}\n", .{document_str});
365        return err;
366    };
367}
368fn expectEqualStreamOfTokens(control_json_reader: anytype, test_json_reader: anytype) !void {
369    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
370    defer arena.deinit();
371    while (true) {
372        const control_token = try control_json_reader.nextAlloc(arena.allocator(), .alloc_always);
373        const test_token = try test_json_reader.nextAlloc(arena.allocator(), .alloc_always);
374        try expectEqualTokens(control_token, test_token);
375        if (control_token == .end_of_document) break;
376        _ = arena.reset(.retain_capacity);
377    }
378}
379
380test "BufferUnderrun" {
381    try testTinyBufferSize(example_document_str);
382    for (number_test_items) |number_str| {
383        try testTinyBufferSize(number_str);
384    }
385    inline for (string_test_cases) |tuple| {
386        try testTinyBufferSize("\"" ++ tuple[0] ++ "\"");
387    }
388}
389
390test "validate" {
391    try std.testing.expectEqual(true, try validate(std.testing.allocator, "{}"));
392    try std.testing.expectEqual(true, try validate(std.testing.allocator, "[]"));
393    try std.testing.expectEqual(false, try validate(std.testing.allocator, "[{[[[[{}]]]]}]"));
394    try std.testing.expectEqual(false, try validate(std.testing.allocator, "{]"));
395    try std.testing.expectEqual(false, try validate(std.testing.allocator, "[}"));
396    try std.testing.expectEqual(false, try validate(std.testing.allocator, "{{{{[]}}}]"));
397}
398
399fn testSkipValue(s: []const u8) !void {
400    var scanner = Scanner.initCompleteInput(std.testing.allocator, s);
401    defer scanner.deinit();
402    try scanner.skipValue();
403    try expectEqualTokens(.end_of_document, try scanner.next());
404
405    var stream: std.Io.Reader = .fixed(s);
406    var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
407    defer json_reader.deinit();
408    try json_reader.skipValue();
409    try expectEqualTokens(.end_of_document, try json_reader.next());
410}
411
412test "skipValue" {
413    try testSkipValue("false");
414    try testSkipValue("true");
415    try testSkipValue("null");
416    try testSkipValue("42");
417    try testSkipValue("42.0");
418    try testSkipValue("\"foo\"");
419    try testSkipValue("[101, 111, 121]");
420    try testSkipValue("{}");
421    try testSkipValue("{\"foo\": \"bar\\nbaz\"}");
422
423    // An absurd number of nestings
424    const nestings = 1000;
425    try testSkipValue("[" ** nestings ++ "]" ** nestings);
426
427    // Would a number token cause problems in a deeply-nested array?
428    try testSkipValue("[" ** nestings ++ "0.118, 999, 881.99, 911.9, 725, 3" ++ "]" ** nestings);
429
430    // Mismatched brace/square bracket
431    try std.testing.expectError(error.SyntaxError, testSkipValue("[102, 111, 111}"));
432}
433
434fn testDiagnosticsFromSource(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, source: anytype) !void {
435    var diagnostics = Diagnostics{};
436    source.enableDiagnostics(&diagnostics);
437
438    if (expected_error) |expected_err| {
439        try std.testing.expectError(expected_err, source.skipValue());
440    } else {
441        try source.skipValue();
442        try std.testing.expectEqual(Token.end_of_document, try source.next());
443    }
444    try std.testing.expectEqual(line, diagnostics.getLine());
445    try std.testing.expectEqual(col, diagnostics.getColumn());
446    try std.testing.expectEqual(byte_offset, diagnostics.getByteOffset());
447}
448fn testDiagnostics(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, s: []const u8) !void {
449    var scanner = Scanner.initCompleteInput(std.testing.allocator, s);
450    defer scanner.deinit();
451    try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &scanner);
452
453    var tiny_stream: std.Io.Reader = .fixed(s);
454    var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream);
455    defer tiny_json_reader.deinit();
456    try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &tiny_json_reader);
457
458    var medium_stream: std.Io.Reader = .fixed(s);
459    var medium_json_reader: Scanner.Reader = .init(std.testing.allocator, &medium_stream);
460    defer medium_json_reader.deinit();
461    try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &medium_json_reader);
462}
463test "enableDiagnostics" {
464    try testDiagnostics(error.UnexpectedEndOfInput, 1, 1, 0, "");
465    try testDiagnostics(null, 1, 3, 2, "[]");
466    try testDiagnostics(null, 2, 2, 3, "[\n]");
467    try testDiagnostics(null, 14, 2, example_document_str.len, example_document_str);
468
469    try testDiagnostics(error.SyntaxError, 3, 1, 25,
470        \\{
471        \\  "common": "mistake",
472        \\}
473    );
474
475    inline for ([_]comptime_int{ 5, 6, 7, 99 }) |reps| {
476        // The error happens 1 byte before the end.
477        const s = "[" ** reps ++ "}";
478        try testDiagnostics(error.SyntaxError, 1, s.len, s.len - 1, s);
479    }
480}
481
482test isNumberFormattedLikeAnInteger {
483    try std.testing.expect(isNumberFormattedLikeAnInteger("0"));
484    try std.testing.expect(isNumberFormattedLikeAnInteger("1"));
485    try std.testing.expect(isNumberFormattedLikeAnInteger("123"));
486    try std.testing.expect(!isNumberFormattedLikeAnInteger("-0"));
487    try std.testing.expect(!isNumberFormattedLikeAnInteger("0.0"));
488    try std.testing.expect(!isNumberFormattedLikeAnInteger("1.0"));
489    try std.testing.expect(!isNumberFormattedLikeAnInteger("1.23"));
490    try std.testing.expect(!isNumberFormattedLikeAnInteger("1e10"));
491    try std.testing.expect(!isNumberFormattedLikeAnInteger("1E10"));
492}
493
494test "fuzz" {
495    try std.testing.fuzz({}, fuzzTestOne, .{});
496}
497
498fn fuzzTestOne(_: void, input: []const u8) !void {
499    var buf: [16384]u8 = undefined;
500    var fba: std.heap.FixedBufferAllocator = .init(&buf);
501
502    var scanner = Scanner.initCompleteInput(fba.allocator(), input);
503    // Property: There are at most input.len tokens
504    var tokens: usize = 0;
505    while ((scanner.next() catch return) != .end_of_document) {
506        tokens += 1;
507        if (tokens > input.len) return error.Overflow;
508    }
509}