Commit b61a6ec8a6

Andrew Kelley <superjoe30@gmail.com>
2017-10-11 16:16:13
implement command line argument parsing for windows
See #302
1 parent 717e791
example/cat/main.zig
@@ -4,11 +4,12 @@ const mem = std.mem;
 const os = std.os;
 
 pub fn main() -> %void {
-    const exe = os.args.at(0);
+    const allocator = &std.debug.global_allocator;
+    var args_it = os.args();
+    const exe = %return unwrapArg(??args_it.next(allocator));
     var catted_anything = false;
-    var arg_i: usize = 1;
-    while (arg_i < os.args.count()) : (arg_i += 1) {
-        const arg = os.args.at(arg_i);
+    while (args_it.next(allocator)) |arg_or_err| {
+        const arg = %return unwrapArg(arg_or_err);
         if (mem.eql(u8, arg, "-")) {
             catted_anything = true;
             %return cat_stream(&io.stdin);
@@ -55,3 +56,10 @@ fn cat_stream(is: &io.InStream) -> %void {
         };
     }
 }
+
+fn unwrapArg(arg: %[]u8) -> %[]u8 {
+    return arg %% |err| {
+        %%io.stderr.printf("Unable to parse command line: {}\n", err);
+        return err;
+    };
+}
std/os/windows/index.zig
@@ -17,7 +17,7 @@ pub extern "kernel32" stdcallcc fn DeleteFileA(lpFileName: LPCSTR) -> bool;
 
 pub extern "kernel32" stdcallcc fn ExitProcess(exit_code: UINT) -> noreturn;
 
-pub extern "kernel32" stdcallcc fn GetCommandLine() -> LPTSTR;
+pub extern "kernel32" stdcallcc fn GetCommandLineA() -> LPSTR;
 
 pub extern "kernel32" stdcallcc fn GetConsoleMode(in_hConsoleHandle: HANDLE, out_lpMode: &DWORD) -> bool;
 
std/os/index.zig
@@ -1,5 +1,7 @@
 const builtin = @import("builtin");
 const Os = builtin.Os;
+const is_windows = builtin.os == Os.windows;
+
 pub const windows = @import("windows/index.zig");
 pub const darwin = @import("darwin.zig");
 pub const linux = @import("linux.zig");
@@ -37,6 +39,7 @@ const cstr = @import("../cstr.zig");
 const io = @import("../io.zig");
 const base64 = @import("../base64.zig");
 const ArrayList = @import("../array_list.zig").ArrayList;
+const Buffer = @import("../buffer.zig").Buffer;
 
 error Unexpected;
 error SystemResources;
@@ -513,18 +516,6 @@ pub fn getEnv(key: []const u8) -> ?[]const u8 {
     return null;
 }
 
-pub const args = struct {
-    pub var raw: []&u8 = undefined;
-
-    pub fn count() -> usize {
-        return raw.len;
-    }
-    pub fn at(i: usize) -> []const u8 {
-        const s = raw[i];
-        return cstr.toSlice(s);
-    }
-};
-
 /// Caller must free the returned memory.
 pub fn getCwd(allocator: &Allocator) -> %[]u8 {
     switch (builtin.os) {
@@ -1144,6 +1135,233 @@ pub fn posix_setregid(rgid: u32, egid: u32) -> %void {
     };
 }
 
+pub const ArgIteratorPosix = struct {
+    index: usize,
+    count: usize,
+
+    pub fn init() -> ArgIteratorPosix {
+        return ArgIteratorPosix {
+            .index = 0,
+            .count = raw.len,
+        };
+    }
+
+    pub fn next(self: &ArgIteratorPosix) -> ?[]const u8 {
+        if (self.index == self.count)
+            return null;
+
+        const s = raw[self.index];
+        self.index += 1;
+        return cstr.toSlice(s);
+    }
+
+    pub fn skip(self: &ArgIteratorPosix) -> bool {
+        if (self.index == self.count)
+            return false;
+
+        self.index += 1;
+        return true;
+    }
+
+    /// This is marked as public but actually it's only meant to be used
+    /// internally by zig's startup code.
+    pub var raw: []&u8 = undefined;
+};
+
+pub const ArgIteratorWindows = struct {
+    index: usize,
+    cmd_line: &const u8,
+    backslash_count: usize,
+    in_quote: bool,
+    quote_count: usize,
+    seen_quote_count: usize,
+
+    pub fn init() -> ArgIteratorWindows {
+        return initWithCmdLine(windows.GetCommandLineA());
+    }
+
+    pub fn initWithCmdLine(cmd_line: &const u8) -> ArgIteratorWindows {
+        return ArgIteratorWindows {
+            .index = 0,
+            .cmd_line = cmd_line,
+            .backslash_count = 0,
+            .in_quote = false,
+            .quote_count = countQuotes(cmd_line),
+            .seen_quote_count = 0,
+        };
+    }
+
+    /// You must free the returned memory when done.
+    pub fn next(self: &ArgIteratorWindows, allocator: &Allocator) -> ?%[]u8 {
+        // march forward over whitespace
+        while (true) : (self.index += 1) {
+            const byte = self.cmd_line[self.index];
+            switch (byte) {
+                0 => return null,
+                ' ', '\t' => continue,
+                else => break,
+            }
+        }
+
+        return self.internalNext(allocator);
+    }
+
+    pub fn skip(self: &ArgIteratorWindows) -> bool {
+        // march forward over whitespace
+        while (true) : (self.index += 1) {
+            const byte = self.cmd_line[self.index];
+            switch (byte) {
+                0 => return false,
+                ' ', '\t' => continue,
+                else => break,
+            }
+        }
+
+        while (true) : (self.index += 1) {
+            const byte = self.cmd_line[self.index];
+            switch (byte) {
+                0 => return true,
+                '"' => {
+                    const quote_is_real = self.backslash_count % 2 == 0;
+                    if (quote_is_real) {
+                        self.seen_quote_count += 1;
+                    }
+                },
+                '\\' => {
+                    self.backslash_count += 1;
+                },
+                ' ', '\t' => {
+                    if (self.seen_quote_count % 2 == 0 or self.seen_quote_count == self.quote_count) {
+                        return true;
+                    }
+                },
+                else => continue,
+            }
+        }
+    }
+
+    fn internalNext(self: &ArgIteratorWindows, allocator: &Allocator) -> %[]u8 {
+        var buf = %return Buffer.initSize(allocator, 0);
+        defer buf.deinit();
+
+        while (true) : (self.index += 1) {
+            const byte = self.cmd_line[self.index];
+            switch (byte) {
+                0 => return buf.toOwnedSlice(),
+                '"' => {
+                    const quote_is_real = self.backslash_count % 2 == 0;
+                    %return self.emitBackslashes(&buf, self.backslash_count / 2);
+
+                    if (quote_is_real) {
+                        self.seen_quote_count += 1;
+                        if (self.seen_quote_count == self.quote_count and self.seen_quote_count % 2 == 1) {
+                            %return buf.appendByte('"');
+                        }
+                    } else {
+                        %return buf.appendByte('"');
+                    }
+                },
+                '\\' => {
+                    self.backslash_count += 1;
+                },
+                ' ', '\t' => {
+                    %return self.emitBackslashes(&buf, self.backslash_count);
+                    if (self.seen_quote_count % 2 == 1 and self.seen_quote_count != self.quote_count) {
+                        %return buf.appendByte(byte);
+                    } else {
+                        return buf.toOwnedSlice();
+                    }
+                },
+                else => {
+                    %return self.emitBackslashes(&buf, self.backslash_count);
+                    %return buf.appendByte(byte);
+                },
+            }
+        }
+    }
+
+    fn emitBackslashes(self: &ArgIteratorWindows, buf: &Buffer, emit_count: usize) -> %void {
+        self.backslash_count = 0;
+        var i: usize = 0;
+        while (i < emit_count) : (i += 1) {
+            %return buf.appendByte('\\');
+        }
+    }
+
+    fn countQuotes(cmd_line: &const u8) -> usize {
+        var result: usize = 0;
+        var backslash_count: usize = 0;
+        var index: usize = 0;
+        while (true) : (index += 1) {
+            const byte = cmd_line[index];
+            switch (byte) {
+                0 => return result,
+                '\\' => backslash_count += 1,
+                '"' => {
+                    result += 1 - (backslash_count % 2);
+                    backslash_count = 0;
+                },
+                else => {
+                    backslash_count = 0;
+                },
+            }
+        }
+    }
+
+};
+
+pub const ArgIterator = struct {
+    inner: if (builtin.os == Os.windows) ArgIteratorWindows else ArgIteratorPosix,
+
+    pub fn init() -> ArgIterator {
+        return ArgIterator {
+            .inner = if (builtin.os == Os.windows) ArgIteratorWindows.init() else ArgIteratorPosix.init(),
+        };
+    }
+    
+    /// You must free the returned memory when done.
+    pub fn next(self: &ArgIterator, allocator: &Allocator) -> ?%[]u8 {
+        if (builtin.os == Os.windows) {
+            return self.inner.next(allocator);
+        } else {
+            return mem.dupe(allocator, u8, self.inner.next() ?? return null);
+        }
+    }
+
+    /// If you only are targeting posix you can call this and not need an allocator.
+    pub fn nextPosix(self: &ArgIterator) -> ?[]const u8 {
+        return self.inner.next();
+    }
+
+    /// Parse past 1 argument without capturing it.
+    /// Returns `true` if skipped an arg, `false` if we are at the end.
+    pub fn skip(self: &ArgIterator) -> bool {
+        return self.inner.skip();
+    }
+};
+
+pub fn args() -> ArgIterator {
+    return ArgIterator.init();
+}
+
+test "windows arg parsing" {
+    testWindowsCmdLine(c"a   b\tc d", [][]const u8{"a", "b", "c", "d"});
+    testWindowsCmdLine(c"\"abc\" d e", [][]const u8{"abc", "d", "e"});
+    testWindowsCmdLine(c"a\\\\\\b d\"e f\"g h", [][]const u8{"a\\\\\\b", "de fg", "h"});
+    testWindowsCmdLine(c"a\\\\\\\"b c d", [][]const u8{"a\\\"b", "c", "d"});
+    testWindowsCmdLine(c"a\\\\\\\\\"b c\" d e", [][]const u8{"a\\\\b c", "d", "e"});
+    testWindowsCmdLine(c"a   b\tc \"d f", [][]const u8{"a", "b", "c", "\"d", "f"});
+}
+
+fn testWindowsCmdLine(input_cmd_line: &const u8, expected_args: []const []const u8) {
+    var it = ArgIteratorWindows.initWithCmdLine(input_cmd_line);
+    for (expected_args) |expected_arg| {
+        const arg = %%??it.next(&debug.global_allocator);
+        assert(mem.eql(u8, arg, expected_arg));
+    }
+    assert(it.next(&debug.global_allocator) == null);
+}
+
 test "std.os" {
     _ = @import("child_process.zig");
     _ = @import("darwin_errno.zig");
std/special/bootstrap.zig
@@ -52,7 +52,7 @@ fn posixCallMainAndExit() -> noreturn {
 }
 
 fn callMain(argc: usize, argv: &&u8, envp: &?&u8) -> %void {
-    std.os.args.raw = argv[0..argc];
+    std.os.ArgIteratorPosix.raw = argv[0..argc];
 
     var env_count: usize = 0;
     while (envp[env_count] != null) : (env_count += 1) {}
std/special/build_runner.zig
@@ -10,37 +10,7 @@ const ArrayList = std.ArrayList;
 error InvalidArgs;
 
 pub fn main() -> %void {
-    var arg_i: usize = 1;
-
-    const zig_exe = {
-        if (arg_i >= os.args.count()) {
-            %%io.stderr.printf("Expected first argument to be path to zig compiler\n");
-            return error.InvalidArgs;
-        }
-        const result = os.args.at(arg_i);
-        arg_i += 1;
-        result
-    };
-
-    const build_root = {
-        if (arg_i >= os.args.count()) {
-            %%io.stderr.printf("Expected second argument to be build root directory path\n");
-            return error.InvalidArgs;
-        }
-        const result = os.args.at(arg_i);
-        arg_i += 1;
-        result
-    };
-
-    const cache_root = {
-        if (arg_i >= os.args.count()) {
-            %%io.stderr.printf("Expected third argument to be cache root directory path\n");
-            return error.InvalidArgs;
-        }
-        const result = os.args.at(arg_i);
-        arg_i += 1;
-        result
-    };
+    var arg_it = os.args();
 
     // TODO use a more general purpose allocator here
     var inc_allocator = %%mem.IncrementingAllocator.init(20 * 1024 * 1024);
@@ -48,6 +18,23 @@ pub fn main() -> %void {
 
     const allocator = &inc_allocator.allocator;
 
+
+    // skip my own exe name
+    _ = arg_it.skip();
+
+    const zig_exe = %return unwrapArg(arg_it.next(allocator) ?? {
+        %%io.stderr.printf("Expected first argument to be path to zig compiler\n");
+        return error.InvalidArgs;
+    });
+    const build_root = %return unwrapArg(arg_it.next(allocator) ?? {
+        %%io.stderr.printf("Expected second argument to be build root directory path\n");
+        return error.InvalidArgs;
+    });
+    const cache_root = %return unwrapArg(arg_it.next(allocator) ?? {
+        %%io.stderr.printf("Expected third argument to be cache root directory path\n");
+        return error.InvalidArgs;
+    });
+
     var builder = Builder.init(allocator, zig_exe, build_root, cache_root);
     defer builder.deinit();
 
@@ -55,8 +42,8 @@ pub fn main() -> %void {
 
     var prefix: ?[]const u8 = null;
 
-    while (arg_i < os.args.count()) : (arg_i += 1) {
-        const arg = os.args.at(arg_i);
+    while (arg_it.next(allocator)) |err_or_arg| {
+        const arg = %return unwrapArg(err_or_arg);
         if (mem.startsWith(u8, arg, "-D")) {
             const option_contents = arg[2..];
             if (option_contents.len == 0) {
@@ -76,10 +63,12 @@ pub fn main() -> %void {
             if (mem.eql(u8, arg, "--verbose")) {
                 builder.verbose = true;
             } else if (mem.eql(u8, arg, "--help")) {
-                 return usage(&builder, false, &io.stdout);
-            } else if (mem.eql(u8, arg, "--prefix") and arg_i + 1 < os.args.count()) {
-                 arg_i += 1;
-                 prefix = os.args.at(arg_i);
+                return usage(&builder, false, &io.stdout);
+            } else if (mem.eql(u8, arg, "--prefix")) {
+                prefix = %return unwrapArg(arg_it.next(allocator) ?? {
+                    %%io.stderr.printf("Expected argument after --prefix\n\n");
+                    return usage(&builder, false, &io.stderr);
+                });
             } else {
                 %%io.stderr.printf("Unrecognized argument: {}\n\n", arg);
                 return usage(&builder, false, &io.stderr);
@@ -151,3 +140,10 @@ fn usage(builder: &Builder, already_ran_build: bool, out_stream: &io.OutStream)
     if (out_stream == &io.stderr)
         return error.InvalidArgs;
 }
+
+fn unwrapArg(arg: %[]u8) -> %[]u8 {
+    return arg %% |err| {
+        %%io.stderr.printf("Unable to parse command line: {}\n", err);
+        return err;
+    };
+}
std/array_list.zig
@@ -14,6 +14,7 @@ pub fn ArrayList(comptime T: type) -> type{
         len: usize,
         allocator: &Allocator,
 
+        /// Deinitialize with `deinit` or use `toOwnedSlice`.
         pub fn init(allocator: &Allocator) -> Self {
             Self {
                 .items = []T{},
@@ -34,6 +35,25 @@ pub fn ArrayList(comptime T: type) -> type{
             return l.items[0..l.len];
         }
 
+        /// ArrayList takes ownership of the passed in slice. The slice must have been
+        /// allocated with `allocator`.
+        /// Deinitialize with `deinit` or use `toOwnedSlice`.
+        pub fn fromOwnedSlice(allocator: &Allocator, slice: []T) -> Self {
+            return Self {
+                .items = slice,
+                .len = slice.len,
+                .allocator = allocator,
+            };
+        }
+
+        /// The caller owns the returned memory. ArrayList becomes empty.
+        pub fn toOwnedSlice(self: &Self) -> []T {
+            const allocator = self.allocator;
+            const result = allocator.shrink(T, self.items, self.len);
+            *self = init(allocator);
+            return result;
+        }
+
         pub fn append(l: &Self, item: &const T) -> %void {
             const new_item_ptr = %return l.addOne();
             *new_item_ptr = *item;
std/buffer.zig
@@ -38,6 +38,27 @@ pub const Buffer = struct {
         return Buffer.init(buffer.list.allocator, buffer.toSliceConst());
     }
 
+    /// Buffer takes ownership of the passed in slice. The slice must have been
+    /// allocated with `allocator`.
+    /// Must deinitialize with deinit.
+    pub fn fromOwnedSlice(allocator: &Allocator, slice: []u8) -> Buffer {
+        var self = Buffer {
+            .list = ArrayList(u8).fromOwnedSlice(allocator, slice),
+        };
+        self.list.append(0);
+        return self;
+    }
+
+    /// The caller owns the returned memory. The Buffer becomes null and
+    /// is safe to `deinit`.
+    pub fn toOwnedSlice(self: &Buffer) -> []u8 {
+        const allocator = self.list.allocator;
+        const result = allocator.shrink(u8, self.list.items, self.len());
+        *self = initNull(allocator);
+        return result;
+    }
+
+
     pub fn deinit(self: &Buffer) {
         self.list.deinit();
     }
test/compare_output.zig
@@ -346,7 +346,8 @@ pub fn addCases(cases: &tests.CompareOutputContext) {
         \\    %%io.stdout.printf("before\n");
         \\    defer %%io.stdout.printf("defer1\n");
         \\    defer %%io.stdout.printf("defer2\n");
-        \\    if (os.args.count() == 1) return;
+        \\    var args_it = @import("std").os.args();
+        \\    if (args_it.skip() and !args_it.skip()) return;
         \\    defer %%io.stdout.printf("defer3\n");
         \\    %%io.stdout.printf("after\n");
         \\}