master
1const std = @import("std");
2const Scanner = @import("Scanner.zig");
3const Token = Scanner.Token;
4const TokenType = Scanner.TokenType;
5const Diagnostics = Scanner.Diagnostics;
6const Error = Scanner.Error;
7const validate = Scanner.validate;
8const isNumberFormattedLikeAnInteger = Scanner.isNumberFormattedLikeAnInteger;
9
10const example_document_str =
11 \\{
12 \\ "Image": {
13 \\ "Width": 800,
14 \\ "Height": 600,
15 \\ "Title": "View from 15th Floor",
16 \\ "Thumbnail": {
17 \\ "Url": "http://www.example.com/image/481989943",
18 \\ "Height": 125,
19 \\ "Width": 100
20 \\ },
21 \\ "Animated" : false,
22 \\ "IDs": [116, 943, 234, 38793]
23 \\ }
24 \\}
25;
26
27fn expectNext(scanner_or_reader: anytype, expected_token: Token) !void {
28 return expectEqualTokens(expected_token, try scanner_or_reader.next());
29}
30
31fn expectPeekNext(scanner_or_reader: anytype, expected_token_type: TokenType, expected_token: Token) !void {
32 try std.testing.expectEqual(expected_token_type, try scanner_or_reader.peekNextTokenType());
33 try expectEqualTokens(expected_token, try scanner_or_reader.next());
34}
35
36test "token" {
37 var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str);
38 defer scanner.deinit();
39
40 try expectNext(&scanner, .object_begin);
41 try expectNext(&scanner, Token{ .string = "Image" });
42 try expectNext(&scanner, .object_begin);
43 try expectNext(&scanner, Token{ .string = "Width" });
44 try expectNext(&scanner, Token{ .number = "800" });
45 try expectNext(&scanner, Token{ .string = "Height" });
46 try expectNext(&scanner, Token{ .number = "600" });
47 try expectNext(&scanner, Token{ .string = "Title" });
48 try expectNext(&scanner, Token{ .string = "View from 15th Floor" });
49 try expectNext(&scanner, Token{ .string = "Thumbnail" });
50 try expectNext(&scanner, .object_begin);
51 try expectNext(&scanner, Token{ .string = "Url" });
52 try expectNext(&scanner, Token{ .string = "http://www.example.com/image/481989943" });
53 try expectNext(&scanner, Token{ .string = "Height" });
54 try expectNext(&scanner, Token{ .number = "125" });
55 try expectNext(&scanner, Token{ .string = "Width" });
56 try expectNext(&scanner, Token{ .number = "100" });
57 try expectNext(&scanner, .object_end);
58 try expectNext(&scanner, Token{ .string = "Animated" });
59 try expectNext(&scanner, .false);
60 try expectNext(&scanner, Token{ .string = "IDs" });
61 try expectNext(&scanner, .array_begin);
62 try expectNext(&scanner, Token{ .number = "116" });
63 try expectNext(&scanner, Token{ .number = "943" });
64 try expectNext(&scanner, Token{ .number = "234" });
65 try expectNext(&scanner, Token{ .number = "38793" });
66 try expectNext(&scanner, .array_end);
67 try expectNext(&scanner, .object_end);
68 try expectNext(&scanner, .object_end);
69 try expectNext(&scanner, .end_of_document);
70}
71
72const all_types_test_case =
73 \\[
74 \\ "", "a\nb",
75 \\ 0, 0.0, -1.1e-1,
76 \\ true, false, null,
77 \\ {"a": {}},
78 \\ []
79 \\]
80;
81
82fn testAllTypes(source: anytype, large_buffer: bool) !void {
83 try expectPeekNext(source, .array_begin, .array_begin);
84 try expectPeekNext(source, .string, Token{ .string = "" });
85 try expectPeekNext(source, .string, Token{ .partial_string = "a" });
86 try expectPeekNext(source, .string, Token{ .partial_string_escaped_1 = "\n".* });
87 if (large_buffer) {
88 try expectPeekNext(source, .string, Token{ .string = "b" });
89 } else {
90 try expectPeekNext(source, .string, Token{ .partial_string = "b" });
91 try expectPeekNext(source, .string, Token{ .string = "" });
92 }
93 if (large_buffer) {
94 try expectPeekNext(source, .number, Token{ .number = "0" });
95 } else {
96 try expectPeekNext(source, .number, Token{ .partial_number = "0" });
97 try expectPeekNext(source, .number, Token{ .number = "" });
98 }
99 if (large_buffer) {
100 try expectPeekNext(source, .number, Token{ .number = "0.0" });
101 } else {
102 try expectPeekNext(source, .number, Token{ .partial_number = "0" });
103 try expectPeekNext(source, .number, Token{ .partial_number = "." });
104 try expectPeekNext(source, .number, Token{ .partial_number = "0" });
105 try expectPeekNext(source, .number, Token{ .number = "" });
106 }
107 if (large_buffer) {
108 try expectPeekNext(source, .number, Token{ .number = "-1.1e-1" });
109 } else {
110 try expectPeekNext(source, .number, Token{ .partial_number = "-" });
111 try expectPeekNext(source, .number, Token{ .partial_number = "1" });
112 try expectPeekNext(source, .number, Token{ .partial_number = "." });
113 try expectPeekNext(source, .number, Token{ .partial_number = "1" });
114 try expectPeekNext(source, .number, Token{ .partial_number = "e" });
115 try expectPeekNext(source, .number, Token{ .partial_number = "-" });
116 try expectPeekNext(source, .number, Token{ .partial_number = "1" });
117 try expectPeekNext(source, .number, Token{ .number = "" });
118 }
119 try expectPeekNext(source, .true, .true);
120 try expectPeekNext(source, .false, .false);
121 try expectPeekNext(source, .null, .null);
122 try expectPeekNext(source, .object_begin, .object_begin);
123 if (large_buffer) {
124 try expectPeekNext(source, .string, Token{ .string = "a" });
125 } else {
126 try expectPeekNext(source, .string, Token{ .partial_string = "a" });
127 try expectPeekNext(source, .string, Token{ .string = "" });
128 }
129 try expectPeekNext(source, .object_begin, .object_begin);
130 try expectPeekNext(source, .object_end, .object_end);
131 try expectPeekNext(source, .object_end, .object_end);
132 try expectPeekNext(source, .array_begin, .array_begin);
133 try expectPeekNext(source, .array_end, .array_end);
134 try expectPeekNext(source, .array_end, .array_end);
135 try expectPeekNext(source, .end_of_document, .end_of_document);
136}
137
138test "peek all types" {
139 var scanner = Scanner.initCompleteInput(std.testing.allocator, all_types_test_case);
140 defer scanner.deinit();
141 try testAllTypes(&scanner, true);
142
143 var stream: std.Io.Reader = .fixed(all_types_test_case);
144 var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
145 defer json_reader.deinit();
146 try testAllTypes(&json_reader, true);
147
148 var tiny_buffer: [1]u8 = undefined;
149 var tiny_stream: std.testing.Reader = .init(&tiny_buffer, &.{.{ .buffer = all_types_test_case }});
150 tiny_stream.artificial_limit = .limited(1);
151 var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream.interface);
152 defer tiny_json_reader.deinit();
153 try testAllTypes(&tiny_json_reader, false);
154}
155
156test "token mismatched close" {
157 var scanner = Scanner.initCompleteInput(std.testing.allocator, "[102, 111, 111 }");
158 defer scanner.deinit();
159 try expectNext(&scanner, .array_begin);
160 try expectNext(&scanner, Token{ .number = "102" });
161 try expectNext(&scanner, Token{ .number = "111" });
162 try expectNext(&scanner, Token{ .number = "111" });
163 try std.testing.expectError(error.SyntaxError, scanner.next());
164}
165
166test "token premature object close" {
167 var scanner = Scanner.initCompleteInput(std.testing.allocator, "{ \"key\": }");
168 defer scanner.deinit();
169 try expectNext(&scanner, .object_begin);
170 try expectNext(&scanner, Token{ .string = "key" });
171 try std.testing.expectError(error.SyntaxError, scanner.next());
172}
173
174test "Scanner basic" {
175 var scanner = Scanner.initCompleteInput(std.testing.allocator, example_document_str);
176 defer scanner.deinit();
177
178 while (true) {
179 const token = try scanner.next();
180 if (token == .end_of_document) break;
181 }
182}
183
184test "Scanner.Reader basic" {
185 var stream: std.Io.Reader = .fixed(example_document_str);
186
187 var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
188 defer json_reader.deinit();
189
190 while (true) {
191 const token = try json_reader.next();
192 if (token == .end_of_document) break;
193 }
194}
195
196const number_test_stems = .{
197 .{ "", "-" },
198 .{ "0", "1", "10", "9999999999999999999999999" },
199 .{ "", ".0", ".999999999999999999999999" },
200 .{ "", "e0", "E0", "e+0", "e-0", "e9999999999999999999999999999" },
201};
202const number_test_items = blk: {
203 var ret: []const []const u8 = &[_][]const u8{};
204 for (number_test_stems[0]) |s0| {
205 for (number_test_stems[1]) |s1| {
206 for (number_test_stems[2]) |s2| {
207 for (number_test_stems[3]) |s3| {
208 ret = ret ++ &[_][]const u8{s0 ++ s1 ++ s2 ++ s3};
209 }
210 }
211 }
212 }
213 break :blk ret;
214};
215
216test "numbers" {
217 for (number_test_items) |number_str| {
218 var scanner = Scanner.initCompleteInput(std.testing.allocator, number_str);
219 defer scanner.deinit();
220
221 const token = try scanner.next();
222 const value = token.number; // assert this is a number
223 try std.testing.expectEqualStrings(number_str, value);
224
225 try std.testing.expectEqual(Token.end_of_document, try scanner.next());
226 }
227}
228
229const string_test_cases = .{
230 // The left is JSON without the "quotes".
231 // The right is the expected unescaped content.
232 .{ "", "" },
233 .{ "\\\\", "\\" },
234 .{ "a\\\\b", "a\\b" },
235 .{ "a\\\"b", "a\"b" },
236 .{ "\\n", "\n" },
237 .{ "\\u000a", "\n" },
238 .{ "𝄞", "\u{1D11E}" },
239 .{ "\\uD834\\uDD1E", "\u{1D11E}" },
240 .{ "\\uD87F\\uDFFE", "\u{2FFFE}" },
241 .{ "\\uff20", "@" },
242};
243
244test "strings" {
245 inline for (string_test_cases) |tuple| {
246 var stream: std.Io.Reader = .fixed("\"" ++ tuple[0] ++ "\"");
247 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
248 defer arena.deinit();
249 var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
250 defer json_reader.deinit();
251
252 const token = try json_reader.nextAlloc(arena.allocator(), .alloc_if_needed);
253 const value = switch (token) {
254 .string => |value| value,
255 .allocated_string => |value| value,
256 else => return error.ExpectedString,
257 };
258 try std.testing.expectEqualStrings(tuple[1], value);
259
260 try std.testing.expectEqual(Token.end_of_document, try json_reader.next());
261 }
262}
263
264const nesting_test_cases = .{
265 .{ null, "[]" },
266 .{ null, "{}" },
267 .{ error.SyntaxError, "[}" },
268 .{ error.SyntaxError, "{]" },
269 .{ null, "[" ** 1000 ++ "]" ** 1000 },
270 .{ null, "{\"\":" ** 1000 ++ "0" ++ "}" ** 1000 },
271 .{ error.SyntaxError, "[" ** 1000 ++ "]" ** 999 ++ "}" },
272 .{ error.SyntaxError, "{\"\":" ** 1000 ++ "0" ++ "}" ** 999 ++ "]" },
273 .{ error.SyntaxError, "[" ** 1000 ++ "]" ** 1001 },
274 .{ error.SyntaxError, "{\"\":" ** 1000 ++ "0" ++ "}" ** 1001 },
275 .{ error.UnexpectedEndOfInput, "[" ** 1000 ++ "]" ** 999 },
276 .{ error.UnexpectedEndOfInput, "{\"\":" ** 1000 ++ "0" ++ "}" ** 999 },
277};
278
279test "nesting" {
280 inline for (nesting_test_cases) |tuple| {
281 const maybe_error = tuple[0];
282 const document_str = tuple[1];
283
284 expectMaybeError(document_str, maybe_error) catch |err| {
285 std.debug.print("in json document: {s}\n", .{document_str});
286 return err;
287 };
288 }
289}
290
291fn expectMaybeError(document_str: []const u8, maybe_error: ?Error) !void {
292 var scanner = Scanner.initCompleteInput(std.testing.allocator, document_str);
293 defer scanner.deinit();
294
295 while (true) {
296 const token = scanner.next() catch |err| {
297 if (maybe_error) |expected_err| {
298 if (err == expected_err) return;
299 }
300 return err;
301 };
302 if (token == .end_of_document) break;
303 }
304 if (maybe_error != null) return error.ExpectedError;
305}
306
307fn expectEqualTokens(expected_token: Token, actual_token: Token) !void {
308 try std.testing.expectEqual(std.meta.activeTag(expected_token), std.meta.activeTag(actual_token));
309 switch (expected_token) {
310 .number => |expected_value| {
311 try std.testing.expectEqualStrings(expected_value, actual_token.number);
312 },
313 .allocated_number => |expected_value| {
314 try std.testing.expectEqualStrings(expected_value, actual_token.allocated_number);
315 },
316 .partial_number => |expected_value| {
317 try std.testing.expectEqualStrings(expected_value, actual_token.partial_number);
318 },
319
320 .string => |expected_value| {
321 try std.testing.expectEqualStrings(expected_value, actual_token.string);
322 },
323 .allocated_string => |expected_value| {
324 try std.testing.expectEqualStrings(expected_value, actual_token.allocated_string);
325 },
326 .partial_string => |expected_value| {
327 try std.testing.expectEqualStrings(expected_value, actual_token.partial_string);
328 },
329 .partial_string_escaped_1 => |expected_value| {
330 try std.testing.expectEqualStrings(&expected_value, &actual_token.partial_string_escaped_1);
331 },
332 .partial_string_escaped_2 => |expected_value| {
333 try std.testing.expectEqualStrings(&expected_value, &actual_token.partial_string_escaped_2);
334 },
335 .partial_string_escaped_3 => |expected_value| {
336 try std.testing.expectEqualStrings(&expected_value, &actual_token.partial_string_escaped_3);
337 },
338 .partial_string_escaped_4 => |expected_value| {
339 try std.testing.expectEqualStrings(&expected_value, &actual_token.partial_string_escaped_4);
340 },
341
342 .object_begin,
343 .object_end,
344 .array_begin,
345 .array_end,
346 .true,
347 .false,
348 .null,
349 .end_of_document,
350 => {},
351 }
352}
353
354fn testTinyBufferSize(document_str: []const u8) !void {
355 var tiny_stream: std.Io.Reader = .fixed(document_str);
356 var normal_stream: std.Io.Reader = .fixed(document_str);
357
358 var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream);
359 defer tiny_json_reader.deinit();
360 var normal_json_reader: Scanner.Reader = .init(std.testing.allocator, &normal_stream);
361 defer normal_json_reader.deinit();
362
363 expectEqualStreamOfTokens(&normal_json_reader, &tiny_json_reader) catch |err| {
364 std.debug.print("in json document: {s}\n", .{document_str});
365 return err;
366 };
367}
368fn expectEqualStreamOfTokens(control_json_reader: anytype, test_json_reader: anytype) !void {
369 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
370 defer arena.deinit();
371 while (true) {
372 const control_token = try control_json_reader.nextAlloc(arena.allocator(), .alloc_always);
373 const test_token = try test_json_reader.nextAlloc(arena.allocator(), .alloc_always);
374 try expectEqualTokens(control_token, test_token);
375 if (control_token == .end_of_document) break;
376 _ = arena.reset(.retain_capacity);
377 }
378}
379
380test "BufferUnderrun" {
381 try testTinyBufferSize(example_document_str);
382 for (number_test_items) |number_str| {
383 try testTinyBufferSize(number_str);
384 }
385 inline for (string_test_cases) |tuple| {
386 try testTinyBufferSize("\"" ++ tuple[0] ++ "\"");
387 }
388}
389
390test "validate" {
391 try std.testing.expectEqual(true, try validate(std.testing.allocator, "{}"));
392 try std.testing.expectEqual(true, try validate(std.testing.allocator, "[]"));
393 try std.testing.expectEqual(false, try validate(std.testing.allocator, "[{[[[[{}]]]]}]"));
394 try std.testing.expectEqual(false, try validate(std.testing.allocator, "{]"));
395 try std.testing.expectEqual(false, try validate(std.testing.allocator, "[}"));
396 try std.testing.expectEqual(false, try validate(std.testing.allocator, "{{{{[]}}}]"));
397}
398
399fn testSkipValue(s: []const u8) !void {
400 var scanner = Scanner.initCompleteInput(std.testing.allocator, s);
401 defer scanner.deinit();
402 try scanner.skipValue();
403 try expectEqualTokens(.end_of_document, try scanner.next());
404
405 var stream: std.Io.Reader = .fixed(s);
406 var json_reader: Scanner.Reader = .init(std.testing.allocator, &stream);
407 defer json_reader.deinit();
408 try json_reader.skipValue();
409 try expectEqualTokens(.end_of_document, try json_reader.next());
410}
411
412test "skipValue" {
413 try testSkipValue("false");
414 try testSkipValue("true");
415 try testSkipValue("null");
416 try testSkipValue("42");
417 try testSkipValue("42.0");
418 try testSkipValue("\"foo\"");
419 try testSkipValue("[101, 111, 121]");
420 try testSkipValue("{}");
421 try testSkipValue("{\"foo\": \"bar\\nbaz\"}");
422
423 // An absurd number of nestings
424 const nestings = 1000;
425 try testSkipValue("[" ** nestings ++ "]" ** nestings);
426
427 // Would a number token cause problems in a deeply-nested array?
428 try testSkipValue("[" ** nestings ++ "0.118, 999, 881.99, 911.9, 725, 3" ++ "]" ** nestings);
429
430 // Mismatched brace/square bracket
431 try std.testing.expectError(error.SyntaxError, testSkipValue("[102, 111, 111}"));
432}
433
434fn testDiagnosticsFromSource(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, source: anytype) !void {
435 var diagnostics = Diagnostics{};
436 source.enableDiagnostics(&diagnostics);
437
438 if (expected_error) |expected_err| {
439 try std.testing.expectError(expected_err, source.skipValue());
440 } else {
441 try source.skipValue();
442 try std.testing.expectEqual(Token.end_of_document, try source.next());
443 }
444 try std.testing.expectEqual(line, diagnostics.getLine());
445 try std.testing.expectEqual(col, diagnostics.getColumn());
446 try std.testing.expectEqual(byte_offset, diagnostics.getByteOffset());
447}
448fn testDiagnostics(expected_error: ?anyerror, line: u64, col: u64, byte_offset: u64, s: []const u8) !void {
449 var scanner = Scanner.initCompleteInput(std.testing.allocator, s);
450 defer scanner.deinit();
451 try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &scanner);
452
453 var tiny_stream: std.Io.Reader = .fixed(s);
454 var tiny_json_reader: Scanner.Reader = .init(std.testing.allocator, &tiny_stream);
455 defer tiny_json_reader.deinit();
456 try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &tiny_json_reader);
457
458 var medium_stream: std.Io.Reader = .fixed(s);
459 var medium_json_reader: Scanner.Reader = .init(std.testing.allocator, &medium_stream);
460 defer medium_json_reader.deinit();
461 try testDiagnosticsFromSource(expected_error, line, col, byte_offset, &medium_json_reader);
462}
463test "enableDiagnostics" {
464 try testDiagnostics(error.UnexpectedEndOfInput, 1, 1, 0, "");
465 try testDiagnostics(null, 1, 3, 2, "[]");
466 try testDiagnostics(null, 2, 2, 3, "[\n]");
467 try testDiagnostics(null, 14, 2, example_document_str.len, example_document_str);
468
469 try testDiagnostics(error.SyntaxError, 3, 1, 25,
470 \\{
471 \\ "common": "mistake",
472 \\}
473 );
474
475 inline for ([_]comptime_int{ 5, 6, 7, 99 }) |reps| {
476 // The error happens 1 byte before the end.
477 const s = "[" ** reps ++ "}";
478 try testDiagnostics(error.SyntaxError, 1, s.len, s.len - 1, s);
479 }
480}
481
482test isNumberFormattedLikeAnInteger {
483 try std.testing.expect(isNumberFormattedLikeAnInteger("0"));
484 try std.testing.expect(isNumberFormattedLikeAnInteger("1"));
485 try std.testing.expect(isNumberFormattedLikeAnInteger("123"));
486 try std.testing.expect(!isNumberFormattedLikeAnInteger("-0"));
487 try std.testing.expect(!isNumberFormattedLikeAnInteger("0.0"));
488 try std.testing.expect(!isNumberFormattedLikeAnInteger("1.0"));
489 try std.testing.expect(!isNumberFormattedLikeAnInteger("1.23"));
490 try std.testing.expect(!isNumberFormattedLikeAnInteger("1e10"));
491 try std.testing.expect(!isNumberFormattedLikeAnInteger("1E10"));
492}
493
494test "fuzz" {
495 try std.testing.fuzz({}, fuzzTestOne, .{});
496}
497
498fn fuzzTestOne(_: void, input: []const u8) !void {
499 var buf: [16384]u8 = undefined;
500 var fba: std.heap.FixedBufferAllocator = .init(&buf);
501
502 var scanner = Scanner.initCompleteInput(fba.allocator(), input);
503 // Property: There are at most input.len tokens
504 var tokens: usize = 0;
505 while ((scanner.next() catch return) != .end_of_document) {
506 tokens += 1;
507 if (tokens > input.len) return error.Overflow;
508 }
509}