master
  1const std = @import("std");
  2const assert = std.debug.assert;
  3const math = std.math;
  4const mem = std.mem;
  5const log = std.log.scoped(.yaml);
  6
  7const Allocator = mem.Allocator;
  8const ArenaAllocator = std.heap.ArenaAllocator;
  9
 10pub const Tokenizer = @import("Tokenizer.zig");
 11pub const parse = @import("parse.zig");
 12
 13const Node = parse.Node;
 14const Tree = parse.Tree;
 15const ParseError = parse.ParseError;
 16
 17pub const YamlError = error{
 18    UnexpectedNodeType,
 19    DuplicateMapKey,
 20    OutOfMemory,
 21    CannotEncodeValue,
 22} || ParseError || std.fmt.ParseIntError;
 23
 24pub const List = []Value;
 25pub const Map = std.StringHashMap(Value);
 26
 27pub const Value = union(enum) {
 28    empty,
 29    int: i64,
 30    float: f64,
 31    string: []const u8,
 32    list: List,
 33    map: Map,
 34
 35    pub fn asInt(self: Value) !i64 {
 36        if (self != .int) return error.TypeMismatch;
 37        return self.int;
 38    }
 39
 40    pub fn asFloat(self: Value) !f64 {
 41        if (self != .float) return error.TypeMismatch;
 42        return self.float;
 43    }
 44
 45    pub fn asString(self: Value) ![]const u8 {
 46        if (self != .string) return error.TypeMismatch;
 47        return self.string;
 48    }
 49
 50    pub fn asList(self: Value) !List {
 51        if (self != .list) return error.TypeMismatch;
 52        return self.list;
 53    }
 54
 55    pub fn asMap(self: Value) !Map {
 56        if (self != .map) return error.TypeMismatch;
 57        return self.map;
 58    }
 59
 60    const StringifyArgs = struct {
 61        indentation: usize = 0,
 62        should_inline_first_key: bool = false,
 63    };
 64
 65    pub fn stringify(self: Value, writer: anytype, args: StringifyArgs) anyerror!void {
 66        switch (self) {
 67            .empty => return,
 68            .int => |int| return writer.print("{}", .{int}),
 69            .float => |float| return writer.print("{d}", .{float}),
 70            .string => |string| return writer.print("{s}", .{string}),
 71            .list => |list| {
 72                const len = list.len;
 73                if (len == 0) return;
 74
 75                const first = list[0];
 76                if (first.isCompound()) {
 77                    for (list, 0..) |elem, i| {
 78                        try writer.writeByteNTimes(' ', args.indentation);
 79                        try writer.writeAll("- ");
 80                        try elem.stringify(writer, .{
 81                            .indentation = args.indentation + 2,
 82                            .should_inline_first_key = true,
 83                        });
 84                        if (i < len - 1) {
 85                            try writer.writeByte('\n');
 86                        }
 87                    }
 88                    return;
 89                }
 90
 91                try writer.writeAll("[ ");
 92                for (list, 0..) |elem, i| {
 93                    try elem.stringify(writer, args);
 94                    if (i < len - 1) {
 95                        try writer.writeAll(", ");
 96                    }
 97                }
 98                try writer.writeAll(" ]");
 99            },
100            .map => |map| {
101                const len = map.count();
102                if (len == 0) return;
103
104                var i: usize = 0;
105                var it = map.iterator();
106                while (it.next()) |entry| {
107                    const key = entry.key_ptr.*;
108                    const value = entry.value_ptr.*;
109
110                    if (!args.should_inline_first_key or i != 0) {
111                        try writer.writeByteNTimes(' ', args.indentation);
112                    }
113                    try writer.print("{s}: ", .{key});
114
115                    const should_inline = blk: {
116                        if (!value.isCompound()) break :blk true;
117                        if (value == .list and value.list.len > 0 and !value.list[0].isCompound()) break :blk true;
118                        break :blk false;
119                    };
120
121                    if (should_inline) {
122                        try value.stringify(writer, args);
123                    } else {
124                        try writer.writeByte('\n');
125                        try value.stringify(writer, .{
126                            .indentation = args.indentation + 4,
127                        });
128                    }
129
130                    if (i < len - 1) {
131                        try writer.writeByte('\n');
132                    }
133
134                    i += 1;
135                }
136            },
137        }
138    }
139
140    fn isCompound(self: Value) bool {
141        return switch (self) {
142            .list, .map => true,
143            else => false,
144        };
145    }
146
147    fn fromNode(arena: Allocator, tree: *const Tree, node: *const Node) YamlError!Value {
148        if (node.cast(Node.Doc)) |doc| {
149            const inner = doc.value orelse {
150                // empty doc
151                return Value{ .empty = {} };
152            };
153            return Value.fromNode(arena, tree, inner);
154        } else if (node.cast(Node.Map)) |map| {
155            // TODO use ContextAdapted HashMap and do not duplicate keys, intern
156            // in a contiguous string buffer.
157            var out_map = std.StringHashMap(Value).init(arena);
158            try out_map.ensureUnusedCapacity(math.cast(u32, map.values.items.len) orelse return error.Overflow);
159
160            for (map.values.items) |entry| {
161                const key = try arena.dupe(u8, tree.getRaw(entry.key, entry.key));
162                const gop = out_map.getOrPutAssumeCapacity(key);
163                if (gop.found_existing) {
164                    return error.DuplicateMapKey;
165                }
166                const value = if (entry.value) |value|
167                    try Value.fromNode(arena, tree, value)
168                else
169                    .empty;
170                gop.value_ptr.* = value;
171            }
172
173            return Value{ .map = out_map };
174        } else if (node.cast(Node.List)) |list| {
175            var out_list = std.array_list.Managed(Value).init(arena);
176            try out_list.ensureUnusedCapacity(list.values.items.len);
177
178            for (list.values.items) |elem| {
179                const value = try Value.fromNode(arena, tree, elem);
180                out_list.appendAssumeCapacity(value);
181            }
182
183            return Value{ .list = try out_list.toOwnedSlice() };
184        } else if (node.cast(Node.Value)) |value| {
185            const raw = tree.getRaw(node.start, node.end);
186
187            try_int: {
188                // TODO infer base for int
189                const int = std.fmt.parseInt(i64, raw, 10) catch break :try_int;
190                return Value{ .int = int };
191            }
192
193            try_float: {
194                const float = std.fmt.parseFloat(f64, raw) catch break :try_float;
195                return Value{ .float = float };
196            }
197
198            return Value{ .string = try arena.dupe(u8, value.string_value.items) };
199        } else {
200            log.debug("Unexpected node type: {}", .{node.tag});
201            return error.UnexpectedNodeType;
202        }
203    }
204
205    fn encode(arena: Allocator, input: anytype) YamlError!?Value {
206        switch (@typeInfo(@TypeOf(input))) {
207            .comptime_int,
208            .int,
209            => return Value{ .int = math.cast(i64, input) orelse return error.Overflow },
210
211            .float => return Value{ .float = math.lossyCast(f64, input) },
212
213            .@"struct" => |info| if (info.is_tuple) {
214                var list = std.array_list.Managed(Value).init(arena);
215                errdefer list.deinit();
216                try list.ensureTotalCapacityPrecise(info.fields.len);
217
218                inline for (info.fields) |field| {
219                    if (try encode(arena, @field(input, field.name))) |value| {
220                        list.appendAssumeCapacity(value);
221                    }
222                }
223
224                return Value{ .list = try list.toOwnedSlice() };
225            } else {
226                var map = Map.init(arena);
227                errdefer map.deinit();
228                try map.ensureTotalCapacity(info.fields.len);
229
230                inline for (info.fields) |field| {
231                    if (try encode(arena, @field(input, field.name))) |value| {
232                        const key = try arena.dupe(u8, field.name);
233                        map.putAssumeCapacityNoClobber(key, value);
234                    }
235                }
236
237                return Value{ .map = map };
238            },
239
240            .@"union" => |info| if (info.tag_type) |tag_type| {
241                inline for (info.fields) |field| {
242                    if (@field(tag_type, field.name) == input) {
243                        return try encode(arena, @field(input, field.name));
244                    }
245                } else unreachable;
246            } else return error.UntaggedUnion,
247
248            .array => return encode(arena, &input),
249
250            .pointer => |info| switch (info.size) {
251                .one => switch (@typeInfo(info.child)) {
252                    .array => |child_info| {
253                        const Slice = []const child_info.child;
254                        return encode(arena, @as(Slice, input));
255                    },
256                    else => {
257                        @compileError("Unhandled type: {s}" ++ @typeName(info.child));
258                    },
259                },
260                .slice => {
261                    if (info.child == u8) {
262                        return Value{ .string = try arena.dupe(u8, input) };
263                    }
264
265                    var list = std.array_list.Managed(Value).init(arena);
266                    errdefer list.deinit();
267                    try list.ensureTotalCapacityPrecise(input.len);
268
269                    for (input) |elem| {
270                        if (try encode(arena, elem)) |value| {
271                            list.appendAssumeCapacity(value);
272                        } else {
273                            log.debug("Could not encode value in a list: {any}", .{elem});
274                            return error.CannotEncodeValue;
275                        }
276                    }
277
278                    return Value{ .list = try list.toOwnedSlice() };
279                },
280                else => {
281                    @compileError("Unhandled type: {s}" ++ @typeName(@TypeOf(input)));
282                },
283            },
284
285            // TODO we should probably have an option to encode `null` and also
286            // allow for some default value too.
287            .optional => return if (input) |val| encode(arena, val) else null,
288
289            .null => return null,
290
291            else => {
292                @compileError("Unhandled type: {s}" ++ @typeName(@TypeOf(input)));
293            },
294        }
295    }
296};
297
298pub const Yaml = struct {
299    arena: ArenaAllocator,
300    tree: ?Tree = null,
301    docs: std.array_list.Managed(Value),
302
303    pub fn deinit(self: *Yaml) void {
304        self.arena.deinit();
305    }
306
307    pub fn load(allocator: Allocator, source: []const u8) !Yaml {
308        var arena = ArenaAllocator.init(allocator);
309        errdefer arena.deinit();
310
311        var tree = Tree.init(arena.allocator());
312        try tree.parse(source);
313
314        var docs = std.array_list.Managed(Value).init(arena.allocator());
315        try docs.ensureTotalCapacityPrecise(tree.docs.items.len);
316
317        for (tree.docs.items) |node| {
318            const value = try Value.fromNode(arena.allocator(), &tree, node);
319            docs.appendAssumeCapacity(value);
320        }
321
322        return Yaml{
323            .arena = arena,
324            .tree = tree,
325            .docs = docs,
326        };
327    }
328
329    pub const Error = error{
330        Unimplemented,
331        TypeMismatch,
332        StructFieldMissing,
333        ArraySizeMismatch,
334        UntaggedUnion,
335        UnionTagMissing,
336        Overflow,
337        OutOfMemory,
338    };
339
340    pub fn parse(self: *Yaml, comptime T: type) Error!T {
341        if (self.docs.items.len == 0) {
342            if (@typeInfo(T) == .void) return {};
343            return error.TypeMismatch;
344        }
345
346        if (self.docs.items.len == 1) {
347            return self.parseValue(T, self.docs.items[0]);
348        }
349
350        switch (@typeInfo(T)) {
351            .array => |info| {
352                var parsed: T = undefined;
353                for (self.docs.items, 0..) |doc, i| {
354                    parsed[i] = try self.parseValue(info.child, doc);
355                }
356                return parsed;
357            },
358            .pointer => |info| {
359                switch (info.size) {
360                    .slice => {
361                        var parsed = try self.arena.allocator().alloc(info.child, self.docs.items.len);
362                        for (self.docs.items, 0..) |doc, i| {
363                            parsed[i] = try self.parseValue(info.child, doc);
364                        }
365                        return parsed;
366                    },
367                    else => return error.TypeMismatch,
368                }
369            },
370            .@"union" => return error.Unimplemented,
371            else => return error.TypeMismatch,
372        }
373    }
374
375    fn parseValue(self: *Yaml, comptime T: type, value: Value) Error!T {
376        return switch (@typeInfo(T)) {
377            .int => math.cast(T, try value.asInt()) orelse return error.Overflow,
378            .float => if (value.asFloat()) |float| {
379                return math.lossyCast(T, float);
380            } else |_| {
381                return math.lossyCast(T, try value.asInt());
382            },
383            .@"struct" => self.parseStruct(T, try value.asMap()),
384            .@"union" => self.parseUnion(T, value),
385            .array => self.parseArray(T, try value.asList()),
386            .pointer => if (value.asList()) |list| {
387                return self.parsePointer(T, .{ .list = list });
388            } else |_| {
389                return self.parsePointer(T, .{ .string = try value.asString() });
390            },
391            .void => error.TypeMismatch,
392            .optional => unreachable,
393            else => error.Unimplemented,
394        };
395    }
396
397    fn parseUnion(self: *Yaml, comptime T: type, value: Value) Error!T {
398        const union_info = @typeInfo(T).@"union";
399
400        if (union_info.tag_type) |_| {
401            inline for (union_info.fields) |field| {
402                if (self.parseValue(field.type, value)) |u_value| {
403                    return @unionInit(T, field.name, u_value);
404                } else |err| {
405                    if (@as(@TypeOf(err) || error{TypeMismatch}, err) != error.TypeMismatch) return err;
406                }
407            }
408        } else return error.UntaggedUnion;
409
410        return error.UnionTagMissing;
411    }
412
413    fn parseOptional(self: *Yaml, comptime T: type, value: ?Value) Error!T {
414        const unwrapped = value orelse return null;
415        const opt_info = @typeInfo(T).optional;
416        return @as(T, try self.parseValue(opt_info.child, unwrapped));
417    }
418
419    fn parseStruct(self: *Yaml, comptime T: type, map: Map) Error!T {
420        const struct_info = @typeInfo(T).@"struct";
421        var parsed: T = undefined;
422
423        inline for (struct_info.fields) |field| {
424            const value: ?Value = map.get(field.name) orelse blk: {
425                const field_name = try mem.replaceOwned(u8, self.arena.allocator(), field.name, "_", "-");
426                break :blk map.get(field_name);
427            };
428
429            if (@typeInfo(field.type) == .optional) {
430                @field(parsed, field.name) = try self.parseOptional(field.type, value);
431                continue;
432            }
433
434            const unwrapped = value orelse {
435                log.debug("missing struct field: {s}: {s}", .{ field.name, @typeName(field.type) });
436                return error.StructFieldMissing;
437            };
438            @field(parsed, field.name) = try self.parseValue(field.type, unwrapped);
439        }
440
441        return parsed;
442    }
443
444    fn parsePointer(self: *Yaml, comptime T: type, value: Value) Error!T {
445        const ptr_info = @typeInfo(T).pointer;
446        const arena = self.arena.allocator();
447
448        switch (ptr_info.size) {
449            .slice => {
450                if (ptr_info.child == u8) {
451                    return value.asString();
452                }
453
454                var parsed = try arena.alloc(ptr_info.child, value.list.len);
455                for (value.list, 0..) |elem, i| {
456                    parsed[i] = try self.parseValue(ptr_info.child, elem);
457                }
458                return parsed;
459            },
460            else => return error.Unimplemented,
461        }
462    }
463
464    fn parseArray(self: *Yaml, comptime T: type, list: List) Error!T {
465        const array_info = @typeInfo(T).array;
466        if (array_info.len != list.len) return error.ArraySizeMismatch;
467
468        var parsed: T = undefined;
469        for (list, 0..) |elem, i| {
470            parsed[i] = try self.parseValue(array_info.child, elem);
471        }
472
473        return parsed;
474    }
475
476    pub fn stringify(self: Yaml, writer: anytype) !void {
477        for (self.docs.items, 0..) |doc, i| {
478            try writer.writeAll("---");
479            if (self.tree.?.getDirective(i)) |directive| {
480                try writer.print(" !{s}", .{directive});
481            }
482            try writer.writeByte('\n');
483            try doc.stringify(writer, .{});
484            try writer.writeByte('\n');
485        }
486        try writer.writeAll("...\n");
487    }
488};
489
490pub fn stringify(allocator: Allocator, input: anytype, writer: anytype) !void {
491    var arena = ArenaAllocator.init(allocator);
492    defer arena.deinit();
493
494    const maybe_value = try Value.encode(arena.allocator(), input);
495
496    if (maybe_value) |value| {
497        // TODO should we output as an explicit doc?
498        // How can allow the user to specify?
499        try value.stringify(writer, .{});
500    }
501}
502
503test {
504    std.testing.refAllDecls(Tokenizer);
505    std.testing.refAllDecls(parse);
506    _ = @import("yaml/test.zig");
507}