Commit 6b39167fdc

LemonBoy <thatlemon@gmail.com>
2020-09-21 21:34:51
std: Rewrite the fmt parser
Turn the FSM parser into a linear one so that's easier to implement new features and/or more error checking without adding more and more states. Functionally-speaking the two parsers are at feature parity.
1 parent 73be594
Changed files (1)
lib
lib/std/fmt.zig
@@ -27,18 +27,6 @@ pub const FormatOptions = struct {
     fill: u8 = ' ',
 };
 
-fn peekIsAlign(comptime fmt: []const u8) bool {
-    // Should only be called during a state transition to the format segment.
-    comptime assert(fmt[0] == ':');
-
-    inline for (([_]u8{ 1, 2 })[0..]) |i| {
-        if (fmt.len > i and (fmt[i] == '<' or fmt[i] == '^' or fmt[i] == '>')) {
-            return true;
-        }
-    }
-    return false;
-}
-
 /// Renders fmt string with args, calling output with slices of bytes.
 /// If `output` returns an error, the error is returned from `format` and
 /// `output` is not called again.
@@ -103,22 +91,6 @@ pub fn format(
         @compileError("32 arguments max are supported per format call");
     }
 
-    const State = enum {
-        Start,
-        Positional,
-        CloseBrace,
-        Specifier,
-        FormatFillAndAlign,
-        FormatWidth,
-        FormatPrecision,
-    };
-
-    comptime var start_index = 0;
-    comptime var state = State.Start;
-    comptime var maybe_pos_arg: ?comptime_int = null;
-    comptime var specifier_start = 0;
-    comptime var specifier_end = 0;
-    comptime var options = FormatOptions{};
     comptime var arg_state: struct {
         next_arg: usize = 0,
         used_args: ArgSetType = 0,
@@ -128,7 +100,7 @@ pub fn format(
             return (@popCount(ArgSetType, self.used_args) != self.args_len);
         }
 
-        fn nextArg(comptime self: *@This(), comptime pos_arg: ?comptime_int) comptime_int {
+        fn nextArg(comptime self: *@This(), comptime pos_arg: ?usize) comptime_int {
             const next_idx = pos_arg orelse blk: {
                 const arg = self.next_arg;
                 self.next_arg += 1;
@@ -146,182 +118,193 @@ pub fn format(
         }
     } = .{};
 
-    inline for (fmt) |c, i| {
-        switch (state) {
-            .Start => switch (c) {
-                '{' => {
-                    if (start_index < i) {
-                        try writer.writeAll(fmt[start_index..i]);
-                    }
+    comptime var parser: struct {
+        buf: []const u8 = undefined,
+        pos: usize = 0,
+
+        // Returns a decimal number or null if the current character is not a
+        // digit
+        fn number(comptime self: *@This()) ?usize {
+            var r: ?usize = null;
+
+            while (self.pos < self.buf.len) : (self.pos += 1) {
+                switch (self.buf[self.pos]) {
+                    '0'...'9' => {
+                        if (r == null) r = 0;
+                        r.? *= 10;
+                        r.? += self.buf[self.pos] - '0';
+                    },
+                    else => break,
+                }
+            }
 
-                    start_index = i;
-                    specifier_start = i + 1;
-                    specifier_end = i + 1;
-                    maybe_pos_arg = null;
-                    state = .Positional;
-                    options = FormatOptions{};
-                },
-                '}' => {
-                    if (start_index < i) {
-                        try writer.writeAll(fmt[start_index..i]);
-                    }
-                    state = .CloseBrace;
-                },
-                else => {},
-            },
-            .Positional => switch (c) {
-                '{' => {
-                    state = .Start;
-                    start_index = i;
-                },
-                ':' => {
-                    state = if (comptime peekIsAlign(fmt[i..])) State.FormatFillAndAlign else State.FormatWidth;
-                    specifier_end = i;
-                },
-                '0'...'9' => {
-                    if (maybe_pos_arg == null) {
-                        maybe_pos_arg = 0;
-                    }
+            return r;
+        }
 
-                    maybe_pos_arg.? *= 10;
-                    maybe_pos_arg.? += c - '0';
-                    specifier_start = i + 1;
+        // Returns a substring of the input starting from the current position
+        // and ending where `ch` is found or until the end if not found
+        fn until(comptime self: *@This(), comptime ch: u8) []const u8 {
+            const start = self.pos;
 
-                    if (maybe_pos_arg.? >= args.len) {
-                        @compileError("Positional value refers to non-existent argument");
-                    }
-                },
-                '}' => {
-                    const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg);
-
-                    try formatType(
-                        args[arg_to_print],
-                        fmt[0..0],
-                        options,
-                        writer,
-                        default_max_depth,
-                    );
-
-                    state = .Start;
-                    start_index = i + 1;
-                },
-                else => {
-                    state = .Specifier;
-                    specifier_start = i;
-                },
-            },
-            .CloseBrace => switch (c) {
-                '}' => {
-                    state = .Start;
-                    start_index = i;
-                },
-                else => @compileError("Single '}' encountered in format string"),
-            },
-            .Specifier => switch (c) {
-                ':' => {
-                    specifier_end = i;
-                    state = if (comptime peekIsAlign(fmt[i..])) State.FormatFillAndAlign else State.FormatWidth;
-                },
-                '}' => {
-                    const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg);
-
-                    try formatType(
-                        args[arg_to_print],
-                        fmt[specifier_start..i],
-                        options,
-                        writer,
-                        default_max_depth,
-                    );
-                    state = .Start;
-                    start_index = i + 1;
-                },
+            if (start >= self.buf.len)
+                return &[_]u8{};
+
+            while (self.pos < self.buf.len) : (self.pos += 1) {
+                if (self.buf[self.pos] == ch) break;
+            }
+            return self.buf[start..self.pos];
+        }
+
+        // Returns one character, if available
+        fn char(comptime self: *@This()) ?u8 {
+            if (self.pos < self.buf.len) {
+                const ch = self.buf[self.pos];
+                self.pos += 1;
+                return ch;
+            }
+            return null;
+        }
+
+        // Returns the n-th next character or null if that's past the end
+        fn peek(comptime self: *@This(), comptime n: usize) ?u8 {
+            return if (self.pos + n < self.buf.len) self.buf[self.pos + n] else null;
+        }
+    } = .{};
+
+    comptime var options: FormatOptions = .{};
+
+    @setEvalBranchQuota(2000000);
+
+    comptime var i = 0;
+    inline while (i < fmt.len) {
+        comptime const start_index = i;
+
+        inline while (i < fmt.len) : (i += 1) {
+            switch (fmt[i]) {
+                '{', '}' => break,
                 else => {},
-            },
-            // Only entered if the format string contains a fill/align segment.
-            .FormatFillAndAlign => switch (c) {
+            }
+        }
+
+        comptime var end_index = i;
+        comptime var unescape_brace = false;
+
+        // Handle {{ and }}, those are un-escaped as single braces
+        if (i + 1 < fmt.len and fmt[i + 1] == fmt[i]) {
+            unescape_brace = true;
+            // Make the first brace part of the literal...
+            end_index += 1;
+            // ...and skip both
+            i += 2;
+        }
+
+        // Write out the literal
+        if (start_index != end_index) {
+            try writer.writeAll(fmt[start_index..end_index]);
+        }
+
+        // We've already skipped the other brace, restart the loop
+        if (unescape_brace) continue;
+
+        if (i >= fmt.len) break;
+
+        if (fmt[i] == '}') {
+            @compileError("missing opening {");
+        }
+
+        // Get past the {
+        comptime assert(fmt[i] == '{');
+        i += 1;
+
+        comptime const fmt_begin = i;
+        // Find the closing brace
+        inline while (i < fmt.len and fmt[i] != '}') : (i += 1) {}
+        comptime const fmt_end = i;
+
+        if (i >= fmt.len) {
+            @compileError("missing closing }");
+        }
+
+        // Get past the }
+        comptime assert(fmt[i] == '}');
+        i += 1;
+
+        options = .{};
+
+        // Parse the format fragment between braces
+        parser.buf = fmt[fmt_begin..fmt_end];
+        parser.pos = 0;
+
+        // Parse the positional argument number
+        comptime var opt_pos_arg = comptime parser.number();
+
+        // Parse the format specifier
+        comptime var specifier_arg = comptime parser.until(':');
+
+        // Skip the colon, if present
+        if (comptime parser.char()) |ch| {
+            if (ch != ':')
+                @compileError("expected : or }, found '" ++ [1]u8{ch} ++ "'");
+        }
+
+        // Parse the fill character
+        // The fill parameter requires the alignment parameter to be specified
+        // too
+        if (comptime parser.peek(1)) |ch| {
+            if (comptime mem.indexOfScalar(u8, "<^>", ch) != null) {
+                options.fill = comptime parser.char().?;
+            }
+        }
+
+        // Parse the alignment parameter
+        if (comptime parser.peek(0)) |ch| {
+            switch (ch) {
                 '<' => {
-                    options.alignment = Alignment.Left;
-                    state = .FormatWidth;
+                    options.alignment = .Left;
+                    _ = comptime parser.char();
                 },
                 '^' => {
-                    options.alignment = Alignment.Center;
-                    state = .FormatWidth;
+                    options.alignment = .Center;
+                    _ = comptime parser.char();
                 },
                 '>' => {
-                    options.alignment = Alignment.Right;
-                    state = .FormatWidth;
+                    options.alignment = .Right;
+                    _ = comptime parser.char();
                 },
-                else => {
-                    options.fill = c;
-                },
-            },
-            .FormatWidth => switch (c) {
-                '0'...'9' => {
-                    if (options.width == null) {
-                        options.width = 0;
-                    }
+                else => {},
+            }
+        }
 
-                    options.width.? *= 10;
-                    options.width.? += c - '0';
-                },
-                '.' => {
-                    state = .FormatPrecision;
-                },
-                '}' => {
-                    const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg);
-
-                    try formatType(
-                        args[arg_to_print],
-                        fmt[specifier_start..specifier_end],
-                        options,
-                        writer,
-                        default_max_depth,
-                    );
-                    state = .Start;
-                    start_index = i + 1;
-                },
-                else => {
-                    @compileError("Unexpected character in width value: " ++ [_]u8{c});
-                },
-            },
-            .FormatPrecision => switch (c) {
-                '0'...'9' => {
-                    if (options.precision == null) {
-                        options.precision = 0;
-                    }
+        // Parse the width parameter
+        comptime var opt_width_arg = comptime parser.number();
+        options.width = opt_width_arg;
 
-                    options.precision.? *= 10;
-                    options.precision.? += c - '0';
-                },
-                '}' => {
-                    const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg);
-
-                    try formatType(
-                        args[arg_to_print],
-                        fmt[specifier_start..specifier_end],
-                        options,
-                        writer,
-                        default_max_depth,
-                    );
-                    state = .Start;
-                    start_index = i + 1;
-                },
-                else => {
-                    @compileError("Unexpected character in precision value: " ++ [_]u8{c});
-                },
-            },
-        }
-    }
-    comptime {
-        if (comptime arg_state.hasUnusedArgs()) {
-            @compileError("Unused arguments");
+        // Skip the dot, if present
+        if (comptime parser.char()) |ch| {
+            if (ch != '.')
+                @compileError("expected . or }, found '" ++ [1]u8{ch} ++ "'");
         }
-        if (state != State.Start) {
-            @compileError("Incomplete format string: " ++ fmt);
+
+        // Parse the precision parameter
+        comptime var opt_precision_arg = comptime parser.number();
+        options.precision = opt_precision_arg;
+
+        if (comptime parser.char()) |ch| {
+            @compileError("extraneous trailing character '" ++ [1]u8{ch} ++ "'");
         }
+
+        const arg_to_print = comptime arg_state.nextArg(opt_pos_arg);
+        try formatType(
+            args[arg_to_print],
+            specifier_arg,
+            options,
+            writer,
+            default_max_depth,
+        );
     }
-    if (start_index < fmt.len) {
-        try writer.writeAll(fmt[start_index..]);
+
+    if (comptime arg_state.hasUnusedArgs()) {
+        @compileError("Unused arguments");
     }
 }
 
@@ -1371,6 +1354,11 @@ test "parse unsigned comptime" {
     }
 }
 
+test "escaped braces" {
+    try testFmt("escaped: {{foo}}\n", "escaped: {{{{foo}}}}\n", .{});
+    try testFmt("escaped: {foo}\n", "escaped: {{foo}}\n", .{});
+}
+
 test "optional" {
     {
         const value: ?i32 = 1234;