Commit 8a97807d68

PhaseMage <phasemage@live.com>
2022-01-30 20:27:52
Full response file (*.rsp) support
I hit the "quotes in an RSP file" issue when trying to compile gRPC using "zig cc". As a fun exercise, I decided to see if I could fix it myself. I'm fully open to this code being flat-out rejected. Or I can take feedback to fix it up. This modifies (and renames) _ArgIteratorWindows_ in process.zig such that it works with arbitrary strings (or the contents of an RSP file). In main.zig, this new _ArgIteratorGeneral_ is used to address the "TODO" listed in _ClangArgIterator_. This change closes #4833. **Pros:** - It has the nice attribute of handling "RSP file" arguments in the same way it handles "cmd_line" arguments. - High Performance, minimal allocations - Fixed bug in previous _ArgIteratorWindows_, where final trailing backslashes in a command line were entirely dropped - Added a test case for the above bug - Harmonized the _ArgIteratorXxxx._initWithAllocator()_ and _next()_ interface across Windows/Posix/Wasi (Moved Windows errors to _initWithAllocator()_ rather than _next()_) - Likely perf benefit on Windows by doing _utf16leToUtf8AllocZ()_ only once for the entire cmd_line **Cons:** - Breaking Change in std library on Windows: Call _ArgIterator.initWithAllocator()_ instead of _ArgIterator.init()_ - PhaseMage is new with contributions to Zig, might need a lot of hand-holding - PhaseMage is a Windows person, non-Windows stuff will need to be double-checked **Testing Done:** - Wrote a few new test cases in process.zig - zig.exe build test -Dskip-release (no new failures seen) - zig cc now builds gRPC without error
1 parent 336aa3c
doc/docgen.zig
@@ -23,21 +23,21 @@ pub fn main() !void {
 
     const allocator = arena.allocator();
 
-    var args_it = process.args();
+    var args_it = try process.argsWithAllocator(allocator);
 
     if (!args_it.skip()) @panic("expected self arg");
 
-    const zig_exe = (try args_it.next(allocator)) orelse @panic("expected zig exe arg");
+    const zig_exe = args_it.next() orelse @panic("expected zig exe arg");
     defer allocator.free(zig_exe);
 
-    const in_file_name = (try args_it.next(allocator)) orelse @panic("expected input arg");
+    const in_file_name = args_it.next() orelse @panic("expected input arg");
     defer allocator.free(in_file_name);
 
-    const out_file_name = (try args_it.next(allocator)) orelse @panic("expected output arg");
+    const out_file_name = args_it.next() orelse @panic("expected output arg");
     defer allocator.free(out_file_name);
 
     var do_code_tests = true;
-    if (try args_it.next(allocator)) |arg| {
+    if (args_it.next()) |arg| {
         if (mem.eql(u8, arg, "--skip-code-tests")) {
             do_code_tests = false;
         } else {
lib/std/process.zig
@@ -203,6 +203,8 @@ pub const ArgIteratorPosix = struct {
     index: usize,
     count: usize,
 
+    pub const InitError = error{};
+
     pub fn init() ArgIteratorPosix {
         return ArgIteratorPosix{
             .index = 0,
@@ -299,195 +301,268 @@ pub const ArgIteratorWasi = struct {
     }
 };
 
-pub const ArgIteratorWindows = struct {
-    index: usize,
-    cmd_line: [*]const u16,
-
-    pub const NextError = error{ OutOfMemory, InvalidCmdLine };
+/// Optional parameters for `ArgIteratorGeneral`
+pub const ArgIteratorGeneralOptions = struct {
+    comments_supported: bool = false,
+};
 
-    pub fn init() ArgIteratorWindows {
-        return initWithCmdLine(os.windows.kernel32.GetCommandLineW());
-    }
+/// A general Iterator to parse a string into a set of arguments
+pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {
+    return struct {
+        allocator: Allocator,
+        index: usize = 0,
+        cmd_line: []const u8,
+
+        /// Should the cmd_line field be free'd (using the allocator) on deinit()?
+        free_cmd_line_on_deinit: bool,
+
+        /// buffer MUST be long enough to hold the cmd_line plus a null terminator.
+        /// buffer will we free'd (using the allocator) on deinit()
+        buffer: []u8,
+        start: usize = 0,
+        end: usize = 0,
+
+        pub const Self = @This();
+
+        pub const InitError = error{OutOfMemory};
+        pub const InitUtf16leError = error{ OutOfMemory, InvalidCmdLine };
+
+        /// cmd_line_utf8 MUST remain valid and constant while using this instance
+        pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
+            var buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1);
+            errdefer allocator.free(buffer);
+
+            return Self{
+                .allocator = allocator,
+                .cmd_line = cmd_line_utf8,
+                .free_cmd_line_on_deinit = false,
+                .buffer = buffer,
+            };
+        }
 
-    pub fn initWithCmdLine(cmd_line: [*]const u16) ArgIteratorWindows {
-        return ArgIteratorWindows{
-            .index = 0,
-            .cmd_line = cmd_line,
-        };
-    }
+        /// cmd_line_utf8 will be free'd (with the allocator) on deinit()
+        pub fn initTakeOwnership(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
+            var buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1);
+            errdefer allocator.free(buffer);
+
+            return Self{
+                .allocator = allocator,
+                .cmd_line = cmd_line_utf8,
+                .free_cmd_line_on_deinit = true,
+                .buffer = buffer,
+            };
+        }
 
-    fn getPointAtIndex(self: *ArgIteratorWindows) u16 {
-        // According to
-        // https://docs.microsoft.com/en-us/windows/win32/intl/using-byte-order-marks
-        // Microsoft uses UTF16-LE. So we just read assuming it's little
-        // endian.
-        return std.mem.littleToNative(u16, self.cmd_line[self.index]);
-    }
+        /// cmd_line_utf16le MUST be encoded UTF16-LE, and is converted to UTF-8 in an internal buffer
+        pub fn initUtf16le(allocator: Allocator, cmd_line_utf16le: [*:0]const u16) InitUtf16leError!Self {
+            var utf16le_slice = mem.sliceTo(cmd_line_utf16le, 0);
+            var cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, utf16le_slice) catch |err| switch (err) {
+                error.ExpectedSecondSurrogateHalf,
+                error.DanglingSurrogateHalf,
+                error.UnexpectedSecondSurrogateHalf,
+                => return error.InvalidCmdLine,
+
+                error.OutOfMemory => return error.OutOfMemory,
+            };
+            errdefer allocator.free(cmd_line);
+
+            var buffer = try allocator.alloc(u8, cmd_line.len + 1);
+            errdefer allocator.free(buffer);
+
+            return Self{
+                .allocator = allocator,
+                .cmd_line = cmd_line,
+                .free_cmd_line_on_deinit = true,
+                .buffer = buffer,
+            };
+        }
 
-    /// You must free the returned memory when done.
-    pub fn next(self: *ArgIteratorWindows, allocator: Allocator) NextError!?[:0]u8 {
-        // march forward over whitespace
-        while (true) : (self.index += 1) {
-            const character = self.getPointAtIndex();
-            switch (character) {
-                0 => return null,
-                ' ', '\t' => continue,
-                else => break,
+        // Skips over whitespace in the cmd_line.
+        // Returns false if the terminating sentinel is reached, true otherwise.
+        // Also skips over comments (if supported).
+        fn skipWhitespace(self: *Self) bool {
+            while (true) : (self.index += 1) {
+                const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
+                switch (character) {
+                    0 => return false,
+                    ' ', '\t', '\r', '\n' => continue,
+                    '#' => {
+                        if (options.comments_supported) {
+                            while (true) : (self.index += 1) {
+                                switch (self.cmd_line[self.index]) {
+                                    '\n' => break,
+                                    0 => return false,
+                                    else => continue,
+                                }
+                            }
+                            continue;
+                        } else {
+                            break;
+                        }
+                    },
+                    else => break,
+                }
             }
+            return true;
         }
 
-        return try self.internalNext(allocator);
-    }
+        pub fn skip(self: *Self) bool {
+            if (!self.skipWhitespace()) {
+                return false;
+            }
 
-    pub fn skip(self: *ArgIteratorWindows) bool {
-        // march forward over whitespace
-        while (true) : (self.index += 1) {
-            const character = self.getPointAtIndex();
-            switch (character) {
-                0 => return false,
-                ' ', '\t' => continue,
-                else => break,
+            var backslash_count: usize = 0;
+            var in_quote = false;
+            while (true) : (self.index += 1) {
+                const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
+                switch (character) {
+                    0 => return true,
+                    '"' => {
+                        const quote_is_real = backslash_count % 2 == 0;
+                        if (quote_is_real) {
+                            in_quote = !in_quote;
+                        }
+                    },
+                    '\\' => {
+                        backslash_count += 1;
+                    },
+                    ' ', '\t', '\r', '\n' => {
+                        if (!in_quote) {
+                            return true;
+                        }
+                        backslash_count = 0;
+                    },
+                    else => {
+                        backslash_count = 0;
+                        continue;
+                    },
+                }
             }
         }
 
-        var backslash_count: usize = 0;
-        var in_quote = false;
-        while (true) : (self.index += 1) {
-            const character = self.getPointAtIndex();
-            switch (character) {
-                0 => return true,
-                '"' => {
-                    const quote_is_real = backslash_count % 2 == 0;
-                    if (quote_is_real) {
-                        in_quote = !in_quote;
-                    }
-                },
-                '\\' => {
-                    backslash_count += 1;
-                },
-                ' ', '\t' => {
-                    if (!in_quote) {
-                        return true;
-                    }
-                    backslash_count = 0;
-                },
-                else => {
-                    backslash_count = 0;
-                    continue;
-                },
+        /// Returns a slice of the internal buffer that contains the next argument.
+        /// Returns null when it reaches the end.
+        pub fn next(self: *Self) ?[:0]const u8 {
+            if (!self.skipWhitespace()) {
+                return null;
+            }
+
+            var backslash_count: usize = 0;
+            var in_quote = false;
+            while (true) : (self.index += 1) {
+                const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
+                switch (character) {
+                    0 => {
+                        self.emitBackslashes(backslash_count);
+                        self.buffer[self.end] = 0;
+                        var token = self.buffer[self.start..self.end :0];
+                        self.end += 1;
+                        self.start = self.end;
+                        return token;
+                    },
+                    '"' => {
+                        const quote_is_real = backslash_count % 2 == 0;
+                        self.emitBackslashes(backslash_count / 2);
+                        backslash_count = 0;
+
+                        if (quote_is_real) {
+                            in_quote = !in_quote;
+                        } else {
+                            self.emitCharacter('"');
+                        }
+                    },
+                    '\\' => {
+                        backslash_count += 1;
+                    },
+                    ' ', '\t', '\r', '\n' => {
+                        self.emitBackslashes(backslash_count);
+                        backslash_count = 0;
+                        if (in_quote) {
+                            self.emitCharacter(character);
+                        } else {
+                            self.buffer[self.end] = 0;
+                            var token = self.buffer[self.start..self.end :0];
+                            self.end += 1;
+                            self.start = self.end;
+                            return token;
+                        }
+                    },
+                    else => {
+                        self.emitBackslashes(backslash_count);
+                        backslash_count = 0;
+                        self.emitCharacter(character);
+                    },
+                }
             }
         }
-    }
 
-    fn internalNext(self: *ArgIteratorWindows, allocator: Allocator) NextError![:0]u8 {
-        var buf = std.ArrayList(u16).init(allocator);
-        defer buf.deinit();
-
-        var backslash_count: usize = 0;
-        var in_quote = false;
-        while (true) : (self.index += 1) {
-            const character = self.getPointAtIndex();
-            switch (character) {
-                0 => {
-                    return convertFromWindowsCmdLineToUTF8(allocator, buf.items);
-                },
-                '"' => {
-                    const quote_is_real = backslash_count % 2 == 0;
-                    try emitBackslashes(&buf, backslash_count / 2);
-                    backslash_count = 0;
-
-                    if (quote_is_real) {
-                        in_quote = !in_quote;
-                    } else {
-                        try buf.append(std.mem.nativeToLittle(u16, '"'));
-                    }
-                },
-                '\\' => {
-                    backslash_count += 1;
-                },
-                ' ', '\t' => {
-                    try emitBackslashes(&buf, backslash_count);
-                    backslash_count = 0;
-                    if (in_quote) {
-                        try buf.append(std.mem.nativeToLittle(u16, character));
-                    } else {
-                        return convertFromWindowsCmdLineToUTF8(allocator, buf.items);
-                    }
-                },
-                else => {
-                    try emitBackslashes(&buf, backslash_count);
-                    backslash_count = 0;
-                    try buf.append(std.mem.nativeToLittle(u16, character));
-                },
+        fn emitBackslashes(self: *Self, emit_count: usize) void {
+            var i: usize = 0;
+            while (i < emit_count) : (i += 1) {
+                self.emitCharacter('\\');
             }
         }
-    }
 
-    fn convertFromWindowsCmdLineToUTF8(allocator: Allocator, buf: []u16) NextError![:0]u8 {
-        return std.unicode.utf16leToUtf8AllocZ(allocator, buf) catch |err| switch (err) {
-            error.ExpectedSecondSurrogateHalf,
-            error.DanglingSurrogateHalf,
-            error.UnexpectedSecondSurrogateHalf,
-            => return error.InvalidCmdLine,
+        fn emitCharacter(self: *Self, char: u8) void {
+            self.buffer[self.end] = char;
+            self.end += 1;
+        }
 
-            error.OutOfMemory => return error.OutOfMemory,
-        };
-    }
-    fn emitBackslashes(buf: *std.ArrayList(u16), emit_count: usize) !void {
-        var i: usize = 0;
-        while (i < emit_count) : (i += 1) {
-            try buf.append(std.mem.nativeToLittle(u16, '\\'));
+        /// Call to free the internal buffer of the iterator.
+        pub fn deinit(self: *Self) void {
+            self.allocator.free(self.buffer);
+
+            if (self.free_cmd_line_on_deinit) {
+                self.allocator.free(self.cmd_line);
+            }
         }
-    }
-};
+    };
+}
 
+/// Cross-platform command line argument iterator.
 pub const ArgIterator = struct {
     const InnerType = switch (builtin.os.tag) {
-        .windows => ArgIteratorWindows,
+        .windows => ArgIteratorGeneral(.{ .comments_supported = false }),
         .wasi => if (builtin.link_libc) ArgIteratorPosix else ArgIteratorWasi,
         else => ArgIteratorPosix,
     };
 
     inner: InnerType,
 
-    /// Initialize the args iterator.
+    /// Initialize the args iterator. Consider using initWithAllocator() instead
+    /// for cross-platform compatibility.
     pub fn init() ArgIterator {
         if (builtin.os.tag == .wasi) {
             @compileError("In WASI, use initWithAllocator instead.");
         }
+        if (builtin.os.tag == .windows) {
+            @compileError("In Windows, use initWithAllocator instead.");
+        }
 
         return ArgIterator{ .inner = InnerType.init() };
     }
 
-    pub const InitError = ArgIteratorWasi.InitError;
+    pub const InitError = switch (builtin.os.tag) {
+        .windows => InnerType.InitUtf16leError,
+        else => InnerType.InitError,
+    };
 
     /// You must deinitialize iterator's internal buffers by calling `deinit` when done.
     pub fn initWithAllocator(allocator: mem.Allocator) InitError!ArgIterator {
         if (builtin.os.tag == .wasi and !builtin.link_libc) {
             return ArgIterator{ .inner = try InnerType.init(allocator) };
         }
-
-        return ArgIterator{ .inner = InnerType.init() };
-    }
-
-    pub const NextError = ArgIteratorWindows.NextError;
-
-    /// You must free the returned memory when done.
-    pub fn next(self: *ArgIterator, allocator: Allocator) NextError!?[:0]u8 {
         if (builtin.os.tag == .windows) {
-            return self.inner.next(allocator);
-        } else {
-            return try allocator.dupeZ(u8, self.inner.next() orelse return null);
+            const cmd_line_w = os.windows.kernel32.GetCommandLineW();
+            return ArgIterator{ .inner = try InnerType.initUtf16le(allocator, cmd_line_w) };
         }
-    }
 
-    /// If you only are targeting posix you can call this and not need an allocator.
-    pub fn nextPosix(self: *ArgIterator) ?[:0]const u8 {
-        return self.inner.next();
+        return ArgIterator{ .inner = InnerType.init() };
     }
 
-    /// If you only are targeting WASI, you can call this and not need an allocator.
-    pub fn nextWasi(self: *ArgIterator) ?[:0]const u8 {
+    /// Get the next argument. Returns 'null' if we are at the end.
+    /// Returned slice is pointing to the iterator's internal buffer.
+    pub fn next(self: *ArgIterator) ?([:0]const u8) {
         return self.inner.next();
     }
 
@@ -500,13 +575,18 @@ pub const ArgIterator = struct {
     /// Call this to free the iterator's internal buffer if the iterator
     /// was created with `initWithAllocator` function.
     pub fn deinit(self: *ArgIterator) void {
-        // Unless we're targeting WASI, this is a no-op.
+        // Unless we're targeting WASI or Windows, this is a no-op.
         if (builtin.os.tag == .wasi and !builtin.link_libc) {
             self.inner.deinit();
         }
+
+        if (builtin.os.tag == .windows) {
+            self.inner.deinit();
+        }
     }
 };
 
+/// Use argsWithAllocator() for cross-platform code
 pub fn args() ArgIterator {
     return ArgIterator.init();
 }
@@ -518,12 +598,10 @@ pub fn argsWithAllocator(allocator: mem.Allocator) ArgIterator.InitError!ArgIter
 
 test "args iterator" {
     var ga = std.testing.allocator;
-    var it = if (builtin.os.tag == .wasi) try argsWithAllocator(ga) else args();
-    defer it.deinit(); // no-op unless WASI
-
-    const prog_name = (try it.next(ga)) orelse unreachable;
-    defer ga.free(prog_name);
+    var it = try argsWithAllocator(ga);
+    defer it.deinit(); // no-op unless WASI or Windows
 
+    const prog_name = it.next() orelse unreachable;
     const expected_suffix = switch (builtin.os.tag) {
         .wasi => "test.wasm",
         .windows => "test.exe",
@@ -533,14 +611,14 @@ test "args iterator" {
 
     try testing.expect(mem.eql(u8, expected_suffix, given_suffix));
     try testing.expect(it.skip()); // Skip over zig_exe_path, passed to the test runner
-    try testing.expect((try it.next(ga)) == null);
+    try testing.expect(it.next() == null);
     try testing.expect(!it.skip());
 }
 
 /// Caller must call argsFree on result.
 pub fn argsAlloc(allocator: mem.Allocator) ![][:0]u8 {
     // TODO refactor to only make 1 allocation.
-    var it = if (builtin.os.tag == .wasi) try argsWithAllocator(allocator) else args();
+    var it = try argsWithAllocator(allocator);
     defer it.deinit();
 
     var contents = std.ArrayList(u8).init(allocator);
@@ -549,8 +627,7 @@ pub fn argsAlloc(allocator: mem.Allocator) ![][:0]u8 {
     var slice_list = std.ArrayList(usize).init(allocator);
     defer slice_list.deinit();
 
-    while (try it.next(allocator)) |arg| {
-        defer allocator.free(arg);
+    while (it.next()) |arg| {
         try contents.appendSlice(arg[0 .. arg.len + 1]);
         try slice_list.append(arg.len);
     }
@@ -586,16 +663,17 @@ pub fn argsFree(allocator: mem.Allocator, args_alloc: []const [:0]u8) void {
     return allocator.free(aligned_allocated_buf);
 }
 
-test "windows arg parsing" {
-    const utf16Literal = std.unicode.utf8ToUtf16LeStringLiteral;
-    try testWindowsCmdLine(utf16Literal("a   b\tc d"), &[_][]const u8{ "a", "b", "c", "d" });
-    try testWindowsCmdLine(utf16Literal("\"abc\" d e"), &[_][]const u8{ "abc", "d", "e" });
-    try testWindowsCmdLine(utf16Literal("a\\\\\\b d\"e f\"g h"), &[_][]const u8{ "a\\\\\\b", "de fg", "h" });
-    try testWindowsCmdLine(utf16Literal("a\\\\\\\"b c d"), &[_][]const u8{ "a\\\"b", "c", "d" });
-    try testWindowsCmdLine(utf16Literal("a\\\\\\\\\"b c\" d e"), &[_][]const u8{ "a\\\\b c", "d", "e" });
-    try testWindowsCmdLine(utf16Literal("a   b\tc \"d f"), &[_][]const u8{ "a", "b", "c", "d f" });
-
-    try testWindowsCmdLine(utf16Literal("\".\\..\\zig-cache\\build\" \"bin\\zig.exe\" \".\\..\" \".\\..\\zig-cache\" \"--help\""), &[_][]const u8{
+test "general arg parsing" {
+    try testGeneralCmdLine("a   b\tc d", &[_][]const u8{ "a", "b", "c", "d" });
+    try testGeneralCmdLine("\"abc\" d e", &[_][]const u8{ "abc", "d", "e" });
+    try testGeneralCmdLine("a\\\\\\b d\"e f\"g h", &[_][]const u8{ "a\\\\\\b", "de fg", "h" });
+    try testGeneralCmdLine("a\\\\\\\"b c d", &[_][]const u8{ "a\\\"b", "c", "d" });
+    try testGeneralCmdLine("a\\\\\\\\\"b c\" d e", &[_][]const u8{ "a\\\\b c", "d", "e" });
+    try testGeneralCmdLine("a   b\tc \"d f", &[_][]const u8{ "a", "b", "c", "d f" });
+    try testGeneralCmdLine("j k l\\", &[_][]const u8{ "j", "k", "l\\" });
+    try testGeneralCmdLine("\"\" x y z\\\\", &[_][]const u8{ "", "x", "y", "z\\\\" });
+
+    try testGeneralCmdLine("\".\\..\\zig-cache\\build\" \"bin\\zig.exe\" \".\\..\" \".\\..\\zig-cache\" \"--help\"", &[_][]const u8{
         ".\\..\\zig-cache\\build",
         "bin\\zig.exe",
         ".\\..",
@@ -604,14 +682,52 @@ test "windows arg parsing" {
     });
 }
 
-fn testWindowsCmdLine(input_cmd_line: [*]const u16, expected_args: []const []const u8) !void {
-    var it = ArgIteratorWindows.initWithCmdLine(input_cmd_line);
+fn testGeneralCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void {
+    var it = try ArgIteratorGeneral(.{ .comments_supported = false })
+        .init(std.testing.allocator, input_cmd_line);
+    defer it.deinit();
+    for (expected_args) |expected_arg| {
+        const arg = it.next().?;
+        try testing.expectEqualStrings(expected_arg, arg);
+    }
+    try testing.expect(it.next() == null);
+}
+
+test "response file arg parsing" {
+    try testResponseFileCmdLine(
+        \\a b
+        \\c d\
+    , &[_][]const u8{ "a", "b", "c", "d\\" });
+    try testResponseFileCmdLine("a b c d\\", &[_][]const u8{ "a", "b", "c", "d\\" });
+
+    try testResponseFileCmdLine(
+        \\j
+        \\ k l # this is a comment \\ \\\ \\\\ "none" "\\" "\\\"
+        \\ "m" #another comment
+        \\
+    , &[_][]const u8{ "j", "k", "l", "m" });
+
+    try testResponseFileCmdLine(
+        \\ "" q ""
+        \\ "r s # t" "u\" v" #another comment
+        \\
+    , &[_][]const u8{ "", "q", "", "r s # t", "u\" v" });
+
+    try testResponseFileCmdLine(
+        \\ -l"advapi32" a# b#c d#
+        \\e\\\
+    , &[_][]const u8{ "-ladvapi32", "a#", "b#c", "d#", "e\\\\\\" });
+}
+
+fn testResponseFileCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void {
+    var it = try ArgIteratorGeneral(.{ .comments_supported = true })
+        .init(std.testing.allocator, input_cmd_line);
+    defer it.deinit();
     for (expected_args) |expected_arg| {
-        const arg = (it.next(std.testing.allocator) catch unreachable).?;
-        defer std.testing.allocator.free(arg);
+        const arg = it.next().?;
         try testing.expectEqualStrings(expected_arg, arg);
     }
-    try testing.expect((try it.next(std.testing.allocator)) == null);
+    try testing.expect(it.next() == null);
 }
 
 pub const UserInfo = struct {
lib/std/unicode.zig
@@ -568,8 +568,8 @@ pub fn utf16leToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8
 
 /// Caller must free returned memory.
 pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]u8 {
-    // optimistically guess that it will all be ascii.
-    var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);
+    // optimistically guess that it will all be ascii (and allocate space for the null terminator)
+    var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);
     errdefer result.deinit();
     var out_index: usize = 0;
     var it = Utf16LeIterator.init(utf16le);
src/main.zig
@@ -4148,7 +4148,8 @@ pub const ClangArgIterator = struct {
     argv: []const []const u8,
     next_index: usize,
     root_args: ?*Args,
-    allocator: Allocator,
+    arg_iterator_response_file: ArgIteratorResponseFile,
+    arena: Allocator,
 
     pub const ZigEquivalent = enum {
         target,
@@ -4210,7 +4211,7 @@ pub const ClangArgIterator = struct {
         argv: []const []const u8,
     };
 
-    fn init(allocator: Allocator, argv: []const []const u8) ClangArgIterator {
+    fn init(arena: Allocator, argv: []const []const u8) ClangArgIterator {
         return .{
             .next_index = 2, // `zig cc foo` this points to `foo`
             .has_next = argv.len > 2,
@@ -4220,10 +4221,22 @@ pub const ClangArgIterator = struct {
             .other_args = undefined,
             .argv = argv,
             .root_args = null,
-            .allocator = allocator,
+            .arg_iterator_response_file = undefined,
+            .arena = arena,
         };
     }
 
+    const ArgIteratorResponseFile = process.ArgIteratorGeneral(.{ .comments_supported = true });
+
+    /// Initialize the arguments from a Response File. "*.rsp"
+    fn initArgIteratorResponseFile(allocator: Allocator, resp_file_path: []const u8) !ArgIteratorResponseFile {
+        const max_bytes = 10 * 1024 * 1024; // 10 MiB of command line arguments is a reasonable limit
+        var cmd_line = try fs.cwd().readFileAlloc(allocator, resp_file_path, max_bytes);
+        errdefer allocator.free(cmd_line);
+
+        return ArgIteratorResponseFile.initTakeOwnership(allocator, cmd_line);
+    }
+
     fn next(self: *ClangArgIterator) !void {
         assert(self.has_next);
         assert(self.next_index < self.argv.len);
@@ -4239,31 +4252,25 @@ pub const ClangArgIterator = struct {
 
             // This is a "compiler response file". We must parse the file and treat its
             // contents as command line parameters.
-            const allocator = self.allocator;
-            const max_bytes = 10 * 1024 * 1024; // 10 MiB of command line arguments is a reasonable limit
+            const arena = self.arena;
             const resp_file_path = arg[1..];
-            const resp_contents = fs.cwd().readFileAlloc(allocator, resp_file_path, max_bytes) catch |err| {
+
+            self.arg_iterator_response_file =
+                initArgIteratorResponseFile(arena, resp_file_path) catch |err| {
                 fatal("unable to read response file '{s}': {s}", .{ resp_file_path, @errorName(err) });
             };
-            defer allocator.free(resp_contents);
-            // TODO is there a specification for this file format? Let's find it and make this parsing more robust
-            // at the very least I'm guessing this needs to handle quotes and `#` comments.
-            var it = mem.tokenize(u8, resp_contents, " \t\r\n");
-            var resp_arg_list = std.ArrayList([]const u8).init(allocator);
+            // NOTE: The ArgIteratorResponseFile returns tokens from next() that are slices of an
+            // internal buffer. This internal buffer is arena allocated, so it is not cleaned up here.
+
+            var resp_arg_list = std.ArrayList([]const u8).init(arena);
             defer resp_arg_list.deinit();
             {
-                errdefer {
-                    for (resp_arg_list.items) |item| {
-                        allocator.free(mem.span(item));
-                    }
+                while (self.arg_iterator_response_file.next()) |token| {
+                    try resp_arg_list.append(token);
                 }
-                while (it.next()) |token| {
-                    const dupe_token = try allocator.dupeZ(u8, token);
-                    errdefer allocator.free(dupe_token);
-                    try resp_arg_list.append(dupe_token);
-                }
-                const args = try allocator.create(Args);
-                errdefer allocator.destroy(args);
+
+                const args = try arena.create(Args);
+                errdefer arena.destroy(args);
                 args.* = .{
                     .next_index = self.next_index,
                     .argv = self.argv,
@@ -4284,6 +4291,7 @@ pub const ClangArgIterator = struct {
             arg = mem.span(self.argv[self.next_index]);
             self.incrementArgIndex();
         }
+
         if (mem.eql(u8, arg, "-") or !mem.startsWith(u8, arg, "-")) {
             self.zig_equivalent = .positional;
             self.only_arg = arg;
@@ -4383,13 +4391,13 @@ pub const ClangArgIterator = struct {
     }
 
     fn resolveRespFileArgs(self: *ClangArgIterator) void {
-        const allocator = self.allocator;
+        const arena = self.arena;
         if (self.next_index >= self.argv.len) {
             if (self.root_args) |root_args| {
                 self.next_index = root_args.next_index;
                 self.argv = root_args.argv;
 
-                allocator.destroy(root_args);
+                arena.destroy(root_args);
                 self.root_args = null;
             }
             if (self.next_index >= self.argv.len) {
test/cli.zig
@@ -11,18 +11,17 @@ pub fn main() !void {
     var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
     defer arena.deinit();
 
-    var arg_it = process.args();
+    a = arena.allocator();
+    var arg_it = try process.argsWithAllocator(a);
 
     // skip my own exe name
     _ = arg_it.skip();
 
-    a = arena.allocator();
-
-    const zig_exe_rel = (try arg_it.next(a)) orelse {
+    const zig_exe_rel = arg_it.next() orelse {
         std.debug.print("Expected first argument to be path to zig compiler\n", .{});
         return error.InvalidArgs;
     };
-    const cache_root = (try arg_it.next(a)) orelse {
+    const cache_root = arg_it.next() orelse {
         std.debug.print("Expected second argument to be cache root directory path\n", .{});
         return error.InvalidArgs;
     };
test/compare_output.zig
@@ -291,7 +291,9 @@ pub fn addCases(cases: *tests.CompareOutputContext) void {
         \\    stdout.print("before\n", .{}) catch unreachable;
         \\    defer stdout.print("defer1\n", .{}) catch unreachable;
         \\    defer stdout.print("defer2\n", .{}) catch unreachable;
-        \\    var args_it = @import("std").process.args();
+        \\    var arena = @import("std").heap.ArenaAllocator.init(@import("std").testing.allocator);
+        \\    defer arena.deinit();
+        \\    var args_it = @import("std").process.argsWithAllocator(arena.allocator()) catch unreachable;
         \\    if (args_it.skip() and !args_it.skip()) return;
         \\    defer stdout.print("defer3\n", .{}) catch unreachable;
         \\    stdout.print("after\n", .{}) catch unreachable;
@@ -358,11 +360,13 @@ pub fn addCases(cases: *tests.CompareOutputContext) void {
             \\const allocator = std.testing.allocator;
             \\
             \\pub fn main() !void {
-            \\    var args_it = std.process.args();
+            \\    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+            \\    defer arena.deinit();
+            \\    var args_it = try std.process.argsWithAllocator(arena.allocator());
             \\    const stdout = io.getStdOut().writer();
             \\    var index: usize = 0;
             \\    _ = args_it.skip();
-            \\    while (try args_it.next(allocator)) |arg| : (index += 1) {
+            \\    while (args_it.next()) |arg| : (index += 1) {
             \\        try stdout.print("{}: {s}\n", .{index, arg});
             \\    }
             \\}
@@ -396,11 +400,13 @@ pub fn addCases(cases: *tests.CompareOutputContext) void {
             \\const allocator = std.testing.allocator;
             \\
             \\pub fn main() !void {
-            \\    var args_it = std.process.args();
+            \\    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+            \\    defer arena.deinit();
+            \\    var args_it = try std.process.argsWithAllocator(arena.allocator());
             \\    const stdout = io.getStdOut().writer();
             \\    var index: usize = 0;
             \\    _ = args_it.skip();
-            \\    while (try args_it.next(allocator)) |arg| : (index += 1) {
+            \\    while (args_it.next()) |arg| : (index += 1) {
             \\        try stdout.print("{}: {s}\n", .{index, arg});
             \\    }
             \\}