Commit 4d9c4ab82c

Carl Åstholm <carl@astholm.se>
2023-12-18 23:08:27
More accurate argv-to-command-line serialization when spawning child processes on Windows
The old implementation had a bug in it in that it didn't quote empty strings, but it also didn't properly follow the special quoting rules required for the first argument (the executable name). This new implementation serializes the argv correctly such that it can be parsed by the `CommandLineToArgvW` algorithm.
1 parent 13f78e2
Changed files (1)
lib/std/child_process.zig
@@ -744,9 +744,6 @@ pub const ChildProcess = struct {
             windowsDestroyPipe(g_hChildStd_ERR_Rd, g_hChildStd_ERR_Wr);
         };
 
-        const cmd_line = try windowsCreateCommandLine(self.allocator, self.argv);
-        defer self.allocator.free(cmd_line);
-
         var siStartInfo = windows.STARTUPINFOW{
             .cb = @sizeOf(windows.STARTUPINFOW),
             .hStdError = g_hChildStd_ERR_Wr,
@@ -818,7 +815,11 @@ pub const ChildProcess = struct {
         const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8);
         defer self.allocator.free(app_name_w);
 
-        const cmd_line_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, cmd_line);
+        const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {
+            // argv[0] contains unsupported characters that will never resolve to a valid exe.
+            error.InvalidArg0 => return error.FileNotFound,
+            else => |e| return e,
+        };
         defer self.allocator.free(cmd_line_w);
 
         run: {
@@ -1236,39 +1237,159 @@ test "windowsCreateProcessSupportsExtension" {
     try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);
 }
 
-/// Caller must dealloc.
-fn windowsCreateCommandLine(allocator: mem.Allocator, argv: []const []const u8) ![:0]u8 {
+pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };
+
+/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and
+/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.
+pub fn argvToCommandLineWindows(
+    allocator: mem.Allocator,
+    argv: []const []const u8,
+) ArgvToCommandLineError![:0]u16 {
     var buf = std.ArrayList(u8).init(allocator);
     defer buf.deinit();
 
-    for (argv, 0..) |arg, arg_i| {
-        if (arg_i != 0) try buf.append(' ');
-        if (mem.indexOfAny(u8, arg, " \t\n\"") == null) {
-            try buf.appendSlice(arg);
-            continue;
+    if (argv.len != 0) {
+        const arg0 = argv[0];
+
+        // The first argument must be quoted if it contains spaces or ASCII control characters
+        // (excluding DEL). It also follows special quoting rules where backslashes have no special
+        // interpretation, which makes it impossible to pass certain first arguments containing
+        // double quotes to a child process without characters from the first argument leaking into
+        // subsequent ones (which could have security implications).
+        //
+        // Empty arguments technically don't need quotes, but we quote them anyway for maximum
+        // compatibility with different implementations of the 'CommandLineToArgvW' algorithm.
+        //
+        // Double quotes are illegal in paths on Windows, so for the sake of simplicity we reject
+        // all first arguments containing double quotes, even ones that we could theoretically
+        // serialize in unquoted form.
+        var needs_quotes = arg0.len == 0;
+        for (arg0) |c| {
+            if (c <= ' ') {
+                needs_quotes = true;
+            } else if (c == '"') {
+                return error.InvalidArg0;
+            }
         }
-        try buf.append('"');
-        var backslash_count: usize = 0;
-        for (arg) |byte| {
-            switch (byte) {
-                '\\' => backslash_count += 1,
-                '"' => {
-                    try buf.appendNTimes('\\', backslash_count * 2 + 1);
-                    try buf.append('"');
-                    backslash_count = 0;
-                },
-                else => {
-                    try buf.appendNTimes('\\', backslash_count);
-                    try buf.append(byte);
-                    backslash_count = 0;
-                },
+        if (needs_quotes) {
+            try buf.append('"');
+            try buf.appendSlice(arg0);
+            try buf.append('"');
+        } else {
+            try buf.appendSlice(arg0);
+        }
+
+        for (argv[1..]) |arg| {
+            try buf.append(' ');
+
+            // Subsequent arguments must be quoted if they contain spaces, tabs or double quotes,
+            // or if they are empty. For simplicity and for maximum compatibility with different
+            // implementations of the 'CommandLineToArgvW' algorithm, we also quote all ASCII
+            // control characters (again, excluding DEL).
+            needs_quotes = for (arg) |c| {
+                if (c <= ' ' or c == '"') {
+                    break true;
+                }
+            } else arg.len == 0;
+            if (!needs_quotes) {
+                try buf.appendSlice(arg);
+                continue;
+            }
+
+            try buf.append('"');
+            var backslash_count: usize = 0;
+            for (arg) |byte| {
+                switch (byte) {
+                    '\\' => {
+                        backslash_count += 1;
+                    },
+                    '"' => {
+                        try buf.appendNTimes('\\', backslash_count * 2 + 1);
+                        try buf.append('"');
+                        backslash_count = 0;
+                    },
+                    else => {
+                        try buf.appendNTimes('\\', backslash_count);
+                        try buf.append(byte);
+                        backslash_count = 0;
+                    },
+                }
             }
+            try buf.appendNTimes('\\', backslash_count * 2);
+            try buf.append('"');
         }
-        try buf.appendNTimes('\\', backslash_count * 2);
-        try buf.append('"');
     }
 
-    return buf.toOwnedSliceSentinel(0);
+    return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items);
+}
+
+test "argvToCommandLineWindows" {
+    const t = testArgvToCommandLineWindows;
+
+    try t(&.{
+        \\C:\Program Files\zig\zig.exe
+        ,
+        \\run
+        ,
+        \\.\src\main.zig
+        ,
+        \\-target
+        ,
+        \\x86_64-windows-gnu
+        ,
+        \\-O
+        ,
+        \\ReleaseSafe
+        ,
+        \\--
+        ,
+        \\--emoji=🗿
+        ,
+        \\--eval=new Regex("Dwayne \"The Rock\" Johnson")
+        ,
+    },
+        \\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 "--eval=new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")"
+    );
+
+    try t(&.{}, "");
+    try t(&.{""}, "\"\"");
+    try t(&.{" "}, "\" \"");
+    try t(&.{"\t"}, "\"\t\"");
+    try t(&.{"\x07"}, "\"\x07\"");
+    try t(&.{"🦎"}, "🦎");
+
+    try t(
+        &.{ "zig", "aa aa", "bb\tbb", "cc\ncc", "dd\r\ndd", "ee\x7Fee" },
+        "zig \"aa aa\" \"bb\tbb\" \"cc\ncc\" \"dd\r\ndd\" ee\x7Fee",
+    );
+
+    try t(
+        &.{ "\\\\foo bar\\foo bar\\", "\\\\zig zag\\zig zag\\" },
+        "\"\\\\foo bar\\foo bar\\\" \"\\\\zig zag\\zig zag\\\\\"",
+    );
+
+    try std.testing.expectError(
+        error.InvalidArg0,
+        argvToCommandLineWindows(std.testing.allocator, &.{"\"quotes\"quotes\""}),
+    );
+    try std.testing.expectError(
+        error.InvalidArg0,
+        argvToCommandLineWindows(std.testing.allocator, &.{"quotes\"quotes"}),
+    );
+    try std.testing.expectError(
+        error.InvalidArg0,
+        argvToCommandLineWindows(std.testing.allocator, &.{"q u o t e s \" q u o t e s"}),
+    );
+}
+
+fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []const u8) !void {
+    const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);
+    defer std.testing.allocator.free(cmd_line_w);
+
+    const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w);
+    defer std.testing.allocator.free(cmd_line);
+
+    try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);
 }
 
 fn windowsDestroyPipe(rd: ?windows.HANDLE, wr: ?windows.HANDLE) void {