master
1const std = @import("std");
2const assert = std.debug.assert;
3const math = std.math;
4const mem = std.mem;
5const log = std.log.scoped(.yaml);
6
7const Allocator = mem.Allocator;
8const ArenaAllocator = std.heap.ArenaAllocator;
9
10pub const Tokenizer = @import("Tokenizer.zig");
11pub const parse = @import("parse.zig");
12
13const Node = parse.Node;
14const Tree = parse.Tree;
15const ParseError = parse.ParseError;
16
17pub const YamlError = error{
18 UnexpectedNodeType,
19 DuplicateMapKey,
20 OutOfMemory,
21 CannotEncodeValue,
22} || ParseError || std.fmt.ParseIntError;
23
24pub const List = []Value;
25pub const Map = std.StringHashMap(Value);
26
27pub const Value = union(enum) {
28 empty,
29 int: i64,
30 float: f64,
31 string: []const u8,
32 list: List,
33 map: Map,
34
35 pub fn asInt(self: Value) !i64 {
36 if (self != .int) return error.TypeMismatch;
37 return self.int;
38 }
39
40 pub fn asFloat(self: Value) !f64 {
41 if (self != .float) return error.TypeMismatch;
42 return self.float;
43 }
44
45 pub fn asString(self: Value) ![]const u8 {
46 if (self != .string) return error.TypeMismatch;
47 return self.string;
48 }
49
50 pub fn asList(self: Value) !List {
51 if (self != .list) return error.TypeMismatch;
52 return self.list;
53 }
54
55 pub fn asMap(self: Value) !Map {
56 if (self != .map) return error.TypeMismatch;
57 return self.map;
58 }
59
60 const StringifyArgs = struct {
61 indentation: usize = 0,
62 should_inline_first_key: bool = false,
63 };
64
65 pub fn stringify(self: Value, writer: anytype, args: StringifyArgs) anyerror!void {
66 switch (self) {
67 .empty => return,
68 .int => |int| return writer.print("{}", .{int}),
69 .float => |float| return writer.print("{d}", .{float}),
70 .string => |string| return writer.print("{s}", .{string}),
71 .list => |list| {
72 const len = list.len;
73 if (len == 0) return;
74
75 const first = list[0];
76 if (first.isCompound()) {
77 for (list, 0..) |elem, i| {
78 try writer.writeByteNTimes(' ', args.indentation);
79 try writer.writeAll("- ");
80 try elem.stringify(writer, .{
81 .indentation = args.indentation + 2,
82 .should_inline_first_key = true,
83 });
84 if (i < len - 1) {
85 try writer.writeByte('\n');
86 }
87 }
88 return;
89 }
90
91 try writer.writeAll("[ ");
92 for (list, 0..) |elem, i| {
93 try elem.stringify(writer, args);
94 if (i < len - 1) {
95 try writer.writeAll(", ");
96 }
97 }
98 try writer.writeAll(" ]");
99 },
100 .map => |map| {
101 const len = map.count();
102 if (len == 0) return;
103
104 var i: usize = 0;
105 var it = map.iterator();
106 while (it.next()) |entry| {
107 const key = entry.key_ptr.*;
108 const value = entry.value_ptr.*;
109
110 if (!args.should_inline_first_key or i != 0) {
111 try writer.writeByteNTimes(' ', args.indentation);
112 }
113 try writer.print("{s}: ", .{key});
114
115 const should_inline = blk: {
116 if (!value.isCompound()) break :blk true;
117 if (value == .list and value.list.len > 0 and !value.list[0].isCompound()) break :blk true;
118 break :blk false;
119 };
120
121 if (should_inline) {
122 try value.stringify(writer, args);
123 } else {
124 try writer.writeByte('\n');
125 try value.stringify(writer, .{
126 .indentation = args.indentation + 4,
127 });
128 }
129
130 if (i < len - 1) {
131 try writer.writeByte('\n');
132 }
133
134 i += 1;
135 }
136 },
137 }
138 }
139
140 fn isCompound(self: Value) bool {
141 return switch (self) {
142 .list, .map => true,
143 else => false,
144 };
145 }
146
147 fn fromNode(arena: Allocator, tree: *const Tree, node: *const Node) YamlError!Value {
148 if (node.cast(Node.Doc)) |doc| {
149 const inner = doc.value orelse {
150 // empty doc
151 return Value{ .empty = {} };
152 };
153 return Value.fromNode(arena, tree, inner);
154 } else if (node.cast(Node.Map)) |map| {
155 // TODO use ContextAdapted HashMap and do not duplicate keys, intern
156 // in a contiguous string buffer.
157 var out_map = std.StringHashMap(Value).init(arena);
158 try out_map.ensureUnusedCapacity(math.cast(u32, map.values.items.len) orelse return error.Overflow);
159
160 for (map.values.items) |entry| {
161 const key = try arena.dupe(u8, tree.getRaw(entry.key, entry.key));
162 const gop = out_map.getOrPutAssumeCapacity(key);
163 if (gop.found_existing) {
164 return error.DuplicateMapKey;
165 }
166 const value = if (entry.value) |value|
167 try Value.fromNode(arena, tree, value)
168 else
169 .empty;
170 gop.value_ptr.* = value;
171 }
172
173 return Value{ .map = out_map };
174 } else if (node.cast(Node.List)) |list| {
175 var out_list = std.array_list.Managed(Value).init(arena);
176 try out_list.ensureUnusedCapacity(list.values.items.len);
177
178 for (list.values.items) |elem| {
179 const value = try Value.fromNode(arena, tree, elem);
180 out_list.appendAssumeCapacity(value);
181 }
182
183 return Value{ .list = try out_list.toOwnedSlice() };
184 } else if (node.cast(Node.Value)) |value| {
185 const raw = tree.getRaw(node.start, node.end);
186
187 try_int: {
188 // TODO infer base for int
189 const int = std.fmt.parseInt(i64, raw, 10) catch break :try_int;
190 return Value{ .int = int };
191 }
192
193 try_float: {
194 const float = std.fmt.parseFloat(f64, raw) catch break :try_float;
195 return Value{ .float = float };
196 }
197
198 return Value{ .string = try arena.dupe(u8, value.string_value.items) };
199 } else {
200 log.debug("Unexpected node type: {}", .{node.tag});
201 return error.UnexpectedNodeType;
202 }
203 }
204
205 fn encode(arena: Allocator, input: anytype) YamlError!?Value {
206 switch (@typeInfo(@TypeOf(input))) {
207 .comptime_int,
208 .int,
209 => return Value{ .int = math.cast(i64, input) orelse return error.Overflow },
210
211 .float => return Value{ .float = math.lossyCast(f64, input) },
212
213 .@"struct" => |info| if (info.is_tuple) {
214 var list = std.array_list.Managed(Value).init(arena);
215 errdefer list.deinit();
216 try list.ensureTotalCapacityPrecise(info.fields.len);
217
218 inline for (info.fields) |field| {
219 if (try encode(arena, @field(input, field.name))) |value| {
220 list.appendAssumeCapacity(value);
221 }
222 }
223
224 return Value{ .list = try list.toOwnedSlice() };
225 } else {
226 var map = Map.init(arena);
227 errdefer map.deinit();
228 try map.ensureTotalCapacity(info.fields.len);
229
230 inline for (info.fields) |field| {
231 if (try encode(arena, @field(input, field.name))) |value| {
232 const key = try arena.dupe(u8, field.name);
233 map.putAssumeCapacityNoClobber(key, value);
234 }
235 }
236
237 return Value{ .map = map };
238 },
239
240 .@"union" => |info| if (info.tag_type) |tag_type| {
241 inline for (info.fields) |field| {
242 if (@field(tag_type, field.name) == input) {
243 return try encode(arena, @field(input, field.name));
244 }
245 } else unreachable;
246 } else return error.UntaggedUnion,
247
248 .array => return encode(arena, &input),
249
250 .pointer => |info| switch (info.size) {
251 .one => switch (@typeInfo(info.child)) {
252 .array => |child_info| {
253 const Slice = []const child_info.child;
254 return encode(arena, @as(Slice, input));
255 },
256 else => {
257 @compileError("Unhandled type: {s}" ++ @typeName(info.child));
258 },
259 },
260 .slice => {
261 if (info.child == u8) {
262 return Value{ .string = try arena.dupe(u8, input) };
263 }
264
265 var list = std.array_list.Managed(Value).init(arena);
266 errdefer list.deinit();
267 try list.ensureTotalCapacityPrecise(input.len);
268
269 for (input) |elem| {
270 if (try encode(arena, elem)) |value| {
271 list.appendAssumeCapacity(value);
272 } else {
273 log.debug("Could not encode value in a list: {any}", .{elem});
274 return error.CannotEncodeValue;
275 }
276 }
277
278 return Value{ .list = try list.toOwnedSlice() };
279 },
280 else => {
281 @compileError("Unhandled type: {s}" ++ @typeName(@TypeOf(input)));
282 },
283 },
284
285 // TODO we should probably have an option to encode `null` and also
286 // allow for some default value too.
287 .optional => return if (input) |val| encode(arena, val) else null,
288
289 .null => return null,
290
291 else => {
292 @compileError("Unhandled type: {s}" ++ @typeName(@TypeOf(input)));
293 },
294 }
295 }
296};
297
298pub const Yaml = struct {
299 arena: ArenaAllocator,
300 tree: ?Tree = null,
301 docs: std.array_list.Managed(Value),
302
303 pub fn deinit(self: *Yaml) void {
304 self.arena.deinit();
305 }
306
307 pub fn load(allocator: Allocator, source: []const u8) !Yaml {
308 var arena = ArenaAllocator.init(allocator);
309 errdefer arena.deinit();
310
311 var tree = Tree.init(arena.allocator());
312 try tree.parse(source);
313
314 var docs = std.array_list.Managed(Value).init(arena.allocator());
315 try docs.ensureTotalCapacityPrecise(tree.docs.items.len);
316
317 for (tree.docs.items) |node| {
318 const value = try Value.fromNode(arena.allocator(), &tree, node);
319 docs.appendAssumeCapacity(value);
320 }
321
322 return Yaml{
323 .arena = arena,
324 .tree = tree,
325 .docs = docs,
326 };
327 }
328
329 pub const Error = error{
330 Unimplemented,
331 TypeMismatch,
332 StructFieldMissing,
333 ArraySizeMismatch,
334 UntaggedUnion,
335 UnionTagMissing,
336 Overflow,
337 OutOfMemory,
338 };
339
340 pub fn parse(self: *Yaml, comptime T: type) Error!T {
341 if (self.docs.items.len == 0) {
342 if (@typeInfo(T) == .void) return {};
343 return error.TypeMismatch;
344 }
345
346 if (self.docs.items.len == 1) {
347 return self.parseValue(T, self.docs.items[0]);
348 }
349
350 switch (@typeInfo(T)) {
351 .array => |info| {
352 var parsed: T = undefined;
353 for (self.docs.items, 0..) |doc, i| {
354 parsed[i] = try self.parseValue(info.child, doc);
355 }
356 return parsed;
357 },
358 .pointer => |info| {
359 switch (info.size) {
360 .slice => {
361 var parsed = try self.arena.allocator().alloc(info.child, self.docs.items.len);
362 for (self.docs.items, 0..) |doc, i| {
363 parsed[i] = try self.parseValue(info.child, doc);
364 }
365 return parsed;
366 },
367 else => return error.TypeMismatch,
368 }
369 },
370 .@"union" => return error.Unimplemented,
371 else => return error.TypeMismatch,
372 }
373 }
374
375 fn parseValue(self: *Yaml, comptime T: type, value: Value) Error!T {
376 return switch (@typeInfo(T)) {
377 .int => math.cast(T, try value.asInt()) orelse return error.Overflow,
378 .float => if (value.asFloat()) |float| {
379 return math.lossyCast(T, float);
380 } else |_| {
381 return math.lossyCast(T, try value.asInt());
382 },
383 .@"struct" => self.parseStruct(T, try value.asMap()),
384 .@"union" => self.parseUnion(T, value),
385 .array => self.parseArray(T, try value.asList()),
386 .pointer => if (value.asList()) |list| {
387 return self.parsePointer(T, .{ .list = list });
388 } else |_| {
389 return self.parsePointer(T, .{ .string = try value.asString() });
390 },
391 .void => error.TypeMismatch,
392 .optional => unreachable,
393 else => error.Unimplemented,
394 };
395 }
396
397 fn parseUnion(self: *Yaml, comptime T: type, value: Value) Error!T {
398 const union_info = @typeInfo(T).@"union";
399
400 if (union_info.tag_type) |_| {
401 inline for (union_info.fields) |field| {
402 if (self.parseValue(field.type, value)) |u_value| {
403 return @unionInit(T, field.name, u_value);
404 } else |err| {
405 if (@as(@TypeOf(err) || error{TypeMismatch}, err) != error.TypeMismatch) return err;
406 }
407 }
408 } else return error.UntaggedUnion;
409
410 return error.UnionTagMissing;
411 }
412
413 fn parseOptional(self: *Yaml, comptime T: type, value: ?Value) Error!T {
414 const unwrapped = value orelse return null;
415 const opt_info = @typeInfo(T).optional;
416 return @as(T, try self.parseValue(opt_info.child, unwrapped));
417 }
418
419 fn parseStruct(self: *Yaml, comptime T: type, map: Map) Error!T {
420 const struct_info = @typeInfo(T).@"struct";
421 var parsed: T = undefined;
422
423 inline for (struct_info.fields) |field| {
424 const value: ?Value = map.get(field.name) orelse blk: {
425 const field_name = try mem.replaceOwned(u8, self.arena.allocator(), field.name, "_", "-");
426 break :blk map.get(field_name);
427 };
428
429 if (@typeInfo(field.type) == .optional) {
430 @field(parsed, field.name) = try self.parseOptional(field.type, value);
431 continue;
432 }
433
434 const unwrapped = value orelse {
435 log.debug("missing struct field: {s}: {s}", .{ field.name, @typeName(field.type) });
436 return error.StructFieldMissing;
437 };
438 @field(parsed, field.name) = try self.parseValue(field.type, unwrapped);
439 }
440
441 return parsed;
442 }
443
444 fn parsePointer(self: *Yaml, comptime T: type, value: Value) Error!T {
445 const ptr_info = @typeInfo(T).pointer;
446 const arena = self.arena.allocator();
447
448 switch (ptr_info.size) {
449 .slice => {
450 if (ptr_info.child == u8) {
451 return value.asString();
452 }
453
454 var parsed = try arena.alloc(ptr_info.child, value.list.len);
455 for (value.list, 0..) |elem, i| {
456 parsed[i] = try self.parseValue(ptr_info.child, elem);
457 }
458 return parsed;
459 },
460 else => return error.Unimplemented,
461 }
462 }
463
464 fn parseArray(self: *Yaml, comptime T: type, list: List) Error!T {
465 const array_info = @typeInfo(T).array;
466 if (array_info.len != list.len) return error.ArraySizeMismatch;
467
468 var parsed: T = undefined;
469 for (list, 0..) |elem, i| {
470 parsed[i] = try self.parseValue(array_info.child, elem);
471 }
472
473 return parsed;
474 }
475
476 pub fn stringify(self: Yaml, writer: anytype) !void {
477 for (self.docs.items, 0..) |doc, i| {
478 try writer.writeAll("---");
479 if (self.tree.?.getDirective(i)) |directive| {
480 try writer.print(" !{s}", .{directive});
481 }
482 try writer.writeByte('\n');
483 try doc.stringify(writer, .{});
484 try writer.writeByte('\n');
485 }
486 try writer.writeAll("...\n");
487 }
488};
489
490pub fn stringify(allocator: Allocator, input: anytype, writer: anytype) !void {
491 var arena = ArenaAllocator.init(allocator);
492 defer arena.deinit();
493
494 const maybe_value = try Value.encode(arena.allocator(), input);
495
496 if (maybe_value) |value| {
497 // TODO should we output as an explicit doc?
498 // How can allow the user to specify?
499 try value.stringify(writer, .{});
500 }
501}
502
503test {
504 std.testing.refAllDecls(Tokenizer);
505 std.testing.refAllDecls(parse);
506 _ = @import("yaml/test.zig");
507}