Commit f4c4c04f1c
Changed files (8)
lib
std
lib/std/process.zig
@@ -625,11 +625,22 @@ pub const ArgIteratorWasi = struct {
};
/// Iterator that implements the Windows command-line parsing algorithm.
+/// The implementation is intended to be compatible with the post-2008 C runtime,
+/// but is *not* intended to be compatible with `CommandLineToArgvW` since
+/// `CommandLineToArgvW` uses the pre-2008 parsing rules.
///
-/// This iterator faithfully implements the parsing behavior observed in `CommandLineToArgvW` with
+/// This iterator faithfully implements the parsing behavior observed from the C runtime with
/// one exception: if the command-line string is empty, the iterator will immediately complete
-/// without returning any arguments (whereas `CommandLineArgvW` will return a single argument
+/// without returning any arguments (whereas the C runtime will return a single argument
/// representing the name of the current executable).
+///
+/// The essential parts of the algorithm are described in Microsoft's documentation:
+///
+/// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments
+///
+/// David Deley explains some additional undocumented quirks in great detail:
+///
+/// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
pub const ArgIteratorWindows = struct {
allocator: Allocator,
/// Owned by the iterator.
@@ -686,6 +697,51 @@ pub const ArgIteratorWindows = struct {
fn emitCharacter(self: *ArgIteratorWindows, char: u8) void {
self.buffer[self.end] = char;
self.end += 1;
+
+ // Because we are emitting WTF-8 byte-by-byte, we need to
+ // check to see if we've emitted two consecutive surrogate
+ // codepoints that form a valid surrogate pair in order
+ // to ensure that we're always emitting well-formed WTF-8
+ // (https://simonsapin.github.io/wtf-8/#concatenating).
+ //
+ // If we do have a valid surrogate pair, we need to emit
+ // the UTF-8 sequence for the codepoint that they encode
+ // instead of the WTF-8 encoding for the two surrogate pairs
+ // separately.
+ //
+ // This is relevant when dealing with a WTF-16 encoded
+ // command line like this:
+ // "<0xD801>"<0xDC37>
+ // which would get converted to WTF-8 in `cmd_line` as:
+ // "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
+ // and then after parsing it'd naively get emitted as:
+ // <0xED><0xA0><0x81><0xED><0xB0><0xB7>
+ // but instead, we need to recognize the surrogate pair
+ // and emit the codepoint it encodes, which in this
+ // example is U+10437 (𐐷), which is encoded in UTF-8 as:
+ // <0xF0><0x90><0x90><0xB7>
+ concatSurrogatePair(self);
+ }
+
+ fn concatSurrogatePair(self: *ArgIteratorWindows) void {
+ // Surrogate codepoints are always encoded as 3 bytes, so there
+ // must be 6 bytes for a surrogate pair to exist.
+ if (self.end - self.start >= 6) {
+ const window = self.buffer[self.end - 6 .. self.end];
+ const view = std.unicode.Wtf8View.init(window) catch return;
+ var it = view.iterator();
+ var pair: [2]u16 = undefined;
+ pair[0] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return);
+ if (!std.unicode.utf16IsHighSurrogate(std.mem.littleToNative(u16, pair[0]))) return;
+ pair[1] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return);
+ if (!std.unicode.utf16IsLowSurrogate(std.mem.littleToNative(u16, pair[1]))) return;
+ // We know we have a valid surrogate pair, so convert
+ // it to UTF-8, overwriting the surrogate pair's bytes
+ // and then chop off the extra bytes.
+ const len = std.unicode.utf16LeToUtf8(window, &pair) catch unreachable;
+ const delta = 6 - len;
+ self.end -= delta;
+ }
}
fn yieldArg(self: *ArgIteratorWindows) [:0]const u8 {
@@ -711,69 +767,37 @@ pub const ArgIteratorWindows = struct {
}
};
- // The essential parts of the algorithm are described in Microsoft's documentation:
- //
- // - <https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments>
- // - <https://learn.microsoft.com/en-us/windows/win32/api/shellapi/nf-shellapi-commandlinetoargvw>
- //
- // David Deley explains some additional undocumented quirks in great detail:
- //
- // - <https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES>
- //
- // Code points <= U+0020 terminating an unquoted first argument was discovered independently by
- // testing and observing the behavior of 'CommandLineToArgvW' on Windows 10.
-
fn nextWithStrategy(self: *ArgIteratorWindows, comptime strategy: type) strategy.T {
// The first argument (the executable name) uses different parsing rules.
if (self.index == 0) {
- var char = if (self.cmd_line.len != 0) self.cmd_line[0] else 0;
- switch (char) {
- 0 => {
- // Immediately complete the iterator.
- // 'CommandLineToArgvW' would return the name of the current executable here.
- return strategy.eof;
- },
- '"' => {
- // If the first character is a quote, read everything until the next quote (then
- // skip that quote), or until the end of the string.
- self.index += 1;
- while (true) : (self.index += 1) {
- char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
- switch (char) {
- 0 => {
- return strategy.yieldArg(self);
- },
- '"' => {
- self.index += 1;
- return strategy.yieldArg(self);
- },
- else => {
- strategy.emitCharacter(self, char);
- },
- }
- }
- },
- else => {
- // Otherwise, read everything until the next space or ASCII control character
- // (not including DEL) (then skip that character), or until the end of the
- // string. This means that if the command-line string starts with one of these
- // characters, the first returned argument will be the empty string.
- while (true) : (self.index += 1) {
- char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
- switch (char) {
- 0 => {
- return strategy.yieldArg(self);
- },
- '\x01'...' ' => {
- self.index += 1;
- return strategy.yieldArg(self);
- },
- else => {
- strategy.emitCharacter(self, char);
- },
+ if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) {
+ // Immediately complete the iterator.
+ // The C runtime would return the name of the current executable here.
+ return strategy.eof;
+ }
+
+ var inside_quotes = false;
+ while (true) : (self.index += 1) {
+ const char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
+ switch (char) {
+ 0 => {
+ return strategy.yieldArg(self);
+ },
+ '"' => {
+ inside_quotes = !inside_quotes;
+ },
+ ' ', '\t' => {
+ if (inside_quotes)
+ strategy.emitCharacter(self, char)
+ else {
+ self.index += 1;
+ return strategy.yieldArg(self);
}
- }
- },
+ },
+ else => {
+ strategy.emitCharacter(self, char);
+ },
+ }
}
}
@@ -791,9 +815,10 @@ pub const ArgIteratorWindows = struct {
//
// - The end of the string always terminates the current argument.
// - When not in 'inside_quotes' mode, a space or tab terminates the current argument.
- // - 2n backslashes followed by a quote emit n backslashes. If in 'inside_quotes' and the
- // quote is immediately followed by a second quote, one quote is emitted and the other is
- // skipped, otherwise, the quote is skipped. Finally, 'inside_quotes' is toggled.
+ // - 2n backslashes followed by a quote emit n backslashes (note: n can be zero).
+ // If in 'inside_quotes' and the quote is immediately followed by a second quote,
+ // one quote is emitted and the other is skipped, otherwise, the quote is skipped
+ // and 'inside_quotes' is toggled.
// - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote.
// - n backslashes not followed by a quote emit n backslashes.
var backslash_count: usize = 0;
@@ -826,8 +851,9 @@ pub const ArgIteratorWindows = struct {
{
strategy.emitCharacter(self, '"');
self.index += 1;
+ } else {
+ inside_quotes = !inside_quotes;
}
- inside_quotes = !inside_quotes;
}
},
'\\' => {
@@ -1215,10 +1241,10 @@ test ArgIteratorWindows {
// Separators
try t("aa bb cc", &.{ "aa", "bb", "cc" });
try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" });
- try t("aa\nbb\ncc", &.{ "aa", "bb\ncc" });
- try t("aa\r\nbb\r\ncc", &.{ "aa", "\nbb\r\ncc" });
- try t("aa\rbb\rcc", &.{ "aa", "bb\rcc" });
- try t("aa\x07bb\x07cc", &.{ "aa", "bb\x07cc" });
+ try t("aa\nbb\ncc", &.{"aa\nbb\ncc"});
+ try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"});
+ try t("aa\rbb\rcc", &.{"aa\rbb\rcc"});
+ try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"});
try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"});
try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"});
@@ -1227,22 +1253,22 @@ test ArgIteratorWindows {
try t(" aa bb ", &.{ "", "aa", "bb" });
try t("\t\t", &.{""});
try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" });
- try t("\n\n", &.{ "", "\n" });
- try t("\n\naa\n\nbb\n\n", &.{ "", "\naa\n\nbb\n\n" });
+ try t("\n\n", &.{"\n\n"});
+ try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"});
// Executable name with quotes/backslashes
try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"});
try t("\"", &.{""});
try t("\"\"", &.{""});
- try t("\"\"\"", &.{ "", "" });
- try t("\"\"\"\"", &.{ "", "" });
- try t("\"\"\"\"\"", &.{ "", "\"" });
- try t("aa\"bb\"cc\"dd", &.{"aa\"bb\"cc\"dd"});
- try t("aa\"bb cc\"dd", &.{ "aa\"bb", "ccdd" });
- try t("\"aa\\\"bb\"", &.{ "aa\\", "bb" });
+ try t("\"\"\"", &.{""});
+ try t("\"\"\"\"", &.{""});
+ try t("\"\"\"\"\"", &.{""});
+ try t("aa\"bb\"cc\"dd", &.{"aabbccdd"});
+ try t("aa\"bb cc\"dd", &.{"aabb ccdd"});
+ try t("\"aa\\\"bb\"", &.{"aa\\bb"});
try t("\"aa\\\\\"", &.{"aa\\\\"});
- try t("aa\\\"bb", &.{"aa\\\"bb"});
- try t("aa\\\\\"bb", &.{"aa\\\\\"bb"});
+ try t("aa\\\"bb", &.{"aa\\bb"});
+ try t("aa\\\\\"bb", &.{"aa\\\\bb"});
// Arguments with quotes/backslashes
try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" });
@@ -1252,29 +1278,66 @@ test ArgIteratorWindows {
try t(". \"\"", &.{ ".", "" });
try t(". \"\"\"", &.{ ".", "\"" });
try t(". \"\"\"\"", &.{ ".", "\"" });
- try t(". \"\"\"\"\"", &.{ ".", "\"" });
+ try t(". \"\"\"\"\"", &.{ ".", "\"\"" });
try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" });
try t(". \" \"", &.{ ".", " " });
try t(". \" \"\"", &.{ ".", " \"" });
try t(". \" \"\"\"", &.{ ".", " \"" });
- try t(". \" \"\"\"\"", &.{ ".", " \"" });
+ try t(". \" \"\"\"\"", &.{ ".", " \"\"" });
try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" });
- try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"" });
+ try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" });
try t(". \\\"", &.{ ".", "\"" });
try t(". \\\"\"", &.{ ".", "\"" });
try t(". \\\"\"\"", &.{ ".", "\"" });
try t(". \\\"\"\"\"", &.{ ".", "\"\"" });
try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" });
- try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"" });
+ try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" });
try t(". \" \\\"", &.{ ".", " \"" });
try t(". \" \\\"\"", &.{ ".", " \"" });
try t(". \" \\\"\"\"", &.{ ".", " \"\"" });
try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" });
- try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"" });
+ try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" });
try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" });
try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" });
try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" });
try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" });
+
+ // From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines
+ try t(
+ \\foo.exe "abc" d e
+ , &.{ "foo.exe", "abc", "d", "e" });
+ try t(
+ \\foo.exe a\\b d"e f"g h
+ , &.{ "foo.exe", "a\\\\b", "de fg", "h" });
+ try t(
+ \\foo.exe a\\\"b c d
+ , &.{ "foo.exe", "a\\\"b", "c", "d" });
+ try t(
+ \\foo.exe a\\\\"b c" d e
+ , &.{ "foo.exe", "a\\\\b c", "d", "e" });
+ try t(
+ \\foo.exe a"b"" c d
+ , &.{ "foo.exe", "ab\" c d" });
+
+ // From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX
+ try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" });
+ try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" });
+ try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" });
+ try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" });
+ try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" });
+ try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" });
+ try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" });
+ try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" });
+ try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" });
+
+ // Surrogate pair encoding of 𐐷 separated by quotes.
+ // Encoded as WTF-16:
+ // "<0xD801>"<0xDC37>
+ // Encoded as WTF-8:
+ // "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
+ // During parsing, the quotes drop out and the surrogate pair
+ // should end up encoded as its normal UTF-8 representation.
+ try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" });
}
fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
test/standalone/windows_argv/build.zig
@@ -0,0 +1,88 @@
+const std = @import("std");
+const builtin = @import("builtin");
+
+pub fn build(b: *std.Build) !void {
+ const test_step = b.step("test", "Test it");
+ b.default_step = test_step;
+
+ if (builtin.os.tag != .windows) return;
+
+ const optimize: std.builtin.OptimizeMode = .Debug;
+
+ const lib_msvc = b.addStaticLibrary(.{
+ .name = "toargv-msvc",
+ .root_source_file = .{ .path = "lib.zig" },
+ .target = b.resolveTargetQuery(.{
+ .abi = .msvc,
+ }),
+ .optimize = optimize,
+ });
+ const verify_msvc = b.addExecutable(.{
+ .name = "verify-msvc",
+ .target = b.resolveTargetQuery(.{
+ .abi = .msvc,
+ }),
+ .optimize = optimize,
+ });
+ verify_msvc.addCSourceFile(.{
+ .file = .{ .path = "verify.c" },
+ .flags = &.{ "-DUNICODE", "-D_UNICODE" },
+ });
+ verify_msvc.linkLibrary(lib_msvc);
+ verify_msvc.linkLibC();
+
+ const lib_gnu = b.addStaticLibrary(.{
+ .name = "toargv-gnu",
+ .root_source_file = .{ .path = "lib.zig" },
+ .target = b.resolveTargetQuery(.{
+ .abi = .gnu,
+ }),
+ .optimize = optimize,
+ });
+ const verify_gnu = b.addExecutable(.{
+ .name = "verify-gnu",
+ .target = b.resolveTargetQuery(.{
+ .abi = .gnu,
+ }),
+ .optimize = optimize,
+ });
+ verify_gnu.addCSourceFile(.{
+ .file = .{ .path = "verify.c" },
+ .flags = &.{ "-DUNICODE", "-D_UNICODE" },
+ });
+ verify_gnu.mingw_unicode_entry_point = true;
+ verify_gnu.linkLibrary(lib_gnu);
+ verify_gnu.linkLibC();
+
+ const fuzz = b.addExecutable(.{
+ .name = "fuzz",
+ .root_source_file = .{ .path = "fuzz.zig" },
+ .target = b.host,
+ .optimize = optimize,
+ });
+
+ const fuzz_max_iterations = b.option(u64, "iterations", "The max fuzz iterations (default: 100)") orelse 100;
+ const fuzz_iterations_arg = std.fmt.allocPrint(b.allocator, "{}", .{fuzz_max_iterations}) catch @panic("oom");
+
+ const fuzz_seed = b.option(u64, "seed", "Seed to use for the PRNG (default: random)") orelse seed: {
+ var buf: [8]u8 = undefined;
+ try std.posix.getrandom(&buf);
+ break :seed std.mem.readInt(u64, &buf, builtin.cpu.arch.endian());
+ };
+ const fuzz_seed_arg = std.fmt.allocPrint(b.allocator, "{}", .{fuzz_seed}) catch @panic("oom");
+
+ const run_msvc = b.addRunArtifact(fuzz);
+ run_msvc.setName("fuzz-msvc");
+ run_msvc.addArtifactArg(verify_msvc);
+ run_msvc.addArgs(&.{ fuzz_iterations_arg, fuzz_seed_arg });
+ run_msvc.expectExitCode(0);
+
+ const run_gnu = b.addRunArtifact(fuzz);
+ run_gnu.setName("fuzz-gnu");
+ run_gnu.addArtifactArg(verify_gnu);
+ run_gnu.addArgs(&.{ fuzz_iterations_arg, fuzz_seed_arg });
+ run_gnu.expectExitCode(0);
+
+ test_step.dependOn(&run_msvc.step);
+ test_step.dependOn(&run_gnu.step);
+}
test/standalone/windows_argv/fuzz.zig
@@ -0,0 +1,159 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const windows = std.os.windows;
+const Allocator = std.mem.Allocator;
+
+pub fn main() !void {
+ var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ defer std.debug.assert(gpa.deinit() == .ok);
+ const allocator = gpa.allocator();
+
+ const args = try std.process.argsAlloc(allocator);
+ defer std.process.argsFree(allocator, args);
+
+ if (args.len < 2) return error.MissingArgs;
+
+ const verify_path_wtf8 = args[1];
+ const verify_path_w = try std.unicode.wtf8ToWtf16LeAllocZ(allocator, verify_path_wtf8);
+ defer allocator.free(verify_path_w);
+
+ const iterations: u64 = iterations: {
+ if (args.len < 3) break :iterations 0;
+ break :iterations try std.fmt.parseUnsigned(u64, args[2], 10);
+ };
+
+ var rand_seed = false;
+ const seed: u64 = seed: {
+ if (args.len < 4) {
+ rand_seed = true;
+ var buf: [8]u8 = undefined;
+ try std.posix.getrandom(&buf);
+ break :seed std.mem.readInt(u64, &buf, builtin.cpu.arch.endian());
+ }
+ break :seed try std.fmt.parseUnsigned(u64, args[3], 10);
+ };
+ var random = std.rand.DefaultPrng.init(seed);
+ const rand = random.random();
+
+ // If the seed was not given via the CLI, then output the
+ // randomly chosen seed so that this run can be reproduced
+ if (rand_seed) {
+ std.debug.print("rand seed: {}\n", .{seed});
+ }
+
+ var cmd_line_w_buf = std.ArrayList(u16).init(allocator);
+ defer cmd_line_w_buf.deinit();
+
+ var i: u64 = 0;
+ var errors: u64 = 0;
+ while (iterations == 0 or i < iterations) {
+ const cmd_line_w = try randomCommandLineW(allocator, rand);
+ defer allocator.free(cmd_line_w);
+
+ // avoid known difference for 0-length command lines
+ if (cmd_line_w.len == 0 or cmd_line_w[0] == '\x00') continue;
+
+ const exit_code = try spawnVerify(verify_path_w, cmd_line_w);
+ if (exit_code != 0) {
+ std.debug.print(">>> found discrepancy <<<\n", .{});
+ const cmd_line_wtf8 = try std.unicode.wtf16LeToWtf8Alloc(allocator, cmd_line_w);
+ defer allocator.free(cmd_line_wtf8);
+ std.debug.print("\"{}\"\n\n", .{std.zig.fmtEscapes(cmd_line_wtf8)});
+
+ errors += 1;
+ }
+
+ i += 1;
+ }
+ if (errors > 0) {
+ // we never get here if iterations is 0 so we don't have to worry about that case
+ std.debug.print("found {} discrepancies in {} iterations\n", .{ errors, iterations });
+ return error.FoundDiscrepancies;
+ }
+}
+
+fn randomCommandLineW(allocator: Allocator, rand: std.rand.Random) ![:0]const u16 {
+ const Choice = enum {
+ backslash,
+ quote,
+ space,
+ tab,
+ control,
+ printable,
+ non_ascii,
+ };
+
+ const choices = rand.uintAtMostBiased(u16, 256);
+ var buf = try std.ArrayList(u16).initCapacity(allocator, choices);
+ errdefer buf.deinit();
+
+ for (0..choices) |_| {
+ const choice = rand.enumValue(Choice);
+ const code_unit = switch (choice) {
+ .backslash => '\\',
+ .quote => '"',
+ .space => ' ',
+ .tab => '\t',
+ .control => switch (rand.uintAtMostBiased(u8, 0x21)) {
+ 0x21 => '\x7F',
+ else => |b| b,
+ },
+ .printable => '!' + rand.uintAtMostBiased(u8, '~' - '!'),
+ .non_ascii => rand.intRangeAtMostBiased(u16, 0x80, 0xFFFF),
+ };
+ try buf.append(std.mem.nativeToLittle(u16, code_unit));
+ }
+
+ return buf.toOwnedSliceSentinel(0);
+}
+
+/// Returns the exit code of the verify process
+fn spawnVerify(verify_path: [:0]const u16, cmd_line: [:0]const u16) !windows.DWORD {
+ const child_proc = spawn: {
+ var startup_info: windows.STARTUPINFOW = .{
+ .cb = @sizeOf(windows.STARTUPINFOW),
+ .lpReserved = null,
+ .lpDesktop = null,
+ .lpTitle = null,
+ .dwX = 0,
+ .dwY = 0,
+ .dwXSize = 0,
+ .dwYSize = 0,
+ .dwXCountChars = 0,
+ .dwYCountChars = 0,
+ .dwFillAttribute = 0,
+ .dwFlags = windows.STARTF_USESTDHANDLES,
+ .wShowWindow = 0,
+ .cbReserved2 = 0,
+ .lpReserved2 = null,
+ .hStdInput = null,
+ .hStdOutput = null,
+ .hStdError = windows.GetStdHandle(windows.STD_ERROR_HANDLE) catch null,
+ };
+ var proc_info: windows.PROCESS_INFORMATION = undefined;
+
+ try windows.CreateProcessW(
+ @constCast(verify_path.ptr),
+ @constCast(cmd_line.ptr),
+ null,
+ null,
+ windows.TRUE,
+ 0,
+ null,
+ null,
+ &startup_info,
+ &proc_info,
+ );
+ windows.CloseHandle(proc_info.hThread);
+
+ break :spawn proc_info.hProcess;
+ };
+ defer windows.CloseHandle(child_proc);
+ try windows.WaitForSingleObjectEx(child_proc, windows.INFINITE, false);
+
+ var exit_code: windows.DWORD = undefined;
+ if (windows.kernel32.GetExitCodeProcess(child_proc, &exit_code) == 0) {
+ return error.UnableToGetExitCode;
+ }
+ return exit_code;
+}
test/standalone/windows_argv/lib.h
@@ -0,0 +1,8 @@
+#ifndef _LIB_H_
+#define _LIB_H_
+
+#include <windows.h>
+
+int verify(int argc, wchar_t *argv[]);
+
+#endif
\ No newline at end of file
test/standalone/windows_argv/lib.zig
@@ -0,0 +1,59 @@
+const std = @import("std");
+
+/// Returns 1 on success, 0 on failure
+export fn verify(argc: c_int, argv: [*]const [*:0]const u16) c_int {
+ const argv_slice = argv[0..@intCast(argc)];
+ testArgv(argv_slice) catch |err| switch (err) {
+ error.OutOfMemory => @panic("oom"),
+ error.Overflow => @panic("bytes needed to contain args would overflow usize"),
+ error.ArgvMismatch => return 0,
+ };
+ return 1;
+}
+
+fn testArgv(expected_args: []const [*:0]const u16) !void {
+ var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+ defer arena_state.deinit();
+ const allocator = arena_state.allocator();
+
+ const args = try std.process.argsAlloc(allocator);
+ var wtf8_buf = std.ArrayList(u8).init(allocator);
+
+ var eql = true;
+ if (args.len != expected_args.len) eql = false;
+
+ const min_len = @min(expected_args.len, args.len);
+ for (expected_args[0..min_len], args[0..min_len], 0..) |expected_arg, arg_wtf8, i| {
+ wtf8_buf.clearRetainingCapacity();
+ try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(expected_arg));
+ if (!std.mem.eql(u8, wtf8_buf.items, arg_wtf8)) {
+ std.debug.print("{}: expected: \"{}\"\n", .{ i, std.zig.fmtEscapes(wtf8_buf.items) });
+ std.debug.print("{}: actual: \"{}\"\n", .{ i, std.zig.fmtEscapes(arg_wtf8) });
+ eql = false;
+ }
+ }
+ if (!eql) {
+ for (expected_args[min_len..], min_len..) |arg, i| {
+ wtf8_buf.clearRetainingCapacity();
+ try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(arg));
+ std.debug.print("{}: expected: \"{}\"\n", .{ i, std.zig.fmtEscapes(wtf8_buf.items) });
+ }
+ for (args[min_len..], min_len..) |arg, i| {
+ std.debug.print("{}: actual: \"{}\"\n", .{ i, std.zig.fmtEscapes(arg) });
+ }
+ const peb = std.os.windows.peb();
+ const lpCmdLine: [*:0]u16 = @ptrCast(peb.ProcessParameters.CommandLine.Buffer);
+ wtf8_buf.clearRetainingCapacity();
+ try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(lpCmdLine));
+ std.debug.print("command line: \"{}\"\n", .{std.zig.fmtEscapes(wtf8_buf.items)});
+ std.debug.print("expected argv:\n", .{});
+ std.debug.print("&.{{\n", .{});
+ for (expected_args) |arg| {
+ wtf8_buf.clearRetainingCapacity();
+ try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(arg));
+ std.debug.print(" \"{}\",\n", .{std.zig.fmtEscapes(wtf8_buf.items)});
+ }
+ std.debug.print("}}\n", .{});
+ return error.ArgvMismatch;
+ }
+}
test/standalone/windows_argv/README.md
@@ -0,0 +1,19 @@
+Tests that Zig's `std.process.ArgIteratorWindows` is compatible with both the MSVC and MinGW C runtimes' argv splitting algorithms.
+
+The method of testing is:
+- Compile a C file with `wmain` as its entry point
+- The C `wmain` calls a Zig-implemented `verify` function that takes the `argv` from `wmain` and compares it to the argv gotten from `std.proccess.argsAlloc` (which takes `kernel32.GetCommandLineW()` and splits it)
+- The compiled C program is spawned continuously as a child process by the implementation in `fuzz.zig` with randomly generated command lines
+ + On Windows, the 'application name' and the 'command line' are disjoint concepts. That is, you can spawn `foo.exe` but set the command line to `bar.exe`, and `CreateProcessW` will spawn `foo.exe` but `argv[0]` will be `bar.exe`. This quirk allows us to test arbitrary `argv[0]` values as well which otherwise wouldn't be possible.
+
+Note: This is intentionally testing against the C runtime argv splitting and *not* [`CommandLineToArgvW`](https://learn.microsoft.com/en-us/windows/win32/api/shellapi/nf-shellapi-commandlinetoargvw), since the C runtime argv splitting was updated in 2008 but `CommandLineToArgvW` still uses the pre-2008 algorithm (which differs in both `argv[0]` rules and `""`; see [here](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESDOC) for details)
+
+---
+
+In addition to being run during `zig build test-standalone`, this test can be run on its own via `zig build test` from within this directory.
+
+When run on its own:
+- `-Diterations=<num>` can be used to set the max fuzzing iterations, and `-Diterations=0` can be used to fuzz indefinitely
+- `-Dseed=<num>` can be used to set the PRNG seed for fuzz testing. If not provided, then the seed is chosen at random during `build.zig` compilation.
+
+On failure, the number of iterations and the seed can be seen in the failing command, e.g. in `path\to\fuzz.exe path\to\verify-msvc.exe 100 2780392459403250529`, the iterations is `100` and the seed is `2780392459403250529`.
test/standalone/windows_argv/verify.c
@@ -0,0 +1,7 @@
+#include <windows.h>
+#include "lib.h"
+
+int wmain(int argc, wchar_t *argv[]) {
+ if (!verify(argc, argv)) return 1;
+ return 0;
+}
\ No newline at end of file
test/standalone/build.zig.zon
@@ -104,6 +104,9 @@
.windows_spawn = .{
.path = "windows_spawn",
},
+ .windows_argv = .{
+ .path = "windows_argv",
+ },
.self_exe_symlink = .{
.path = "self_exe_symlink",
},