Commit b109186dd5

Phil Schumann <metaleap@users.noreply.github.com>
2020-04-08 02:27:18
std/zig/parse_string_literal.zig: add hex+unicode escapes (#4678)
1 parent 66b2477
Changed files (1)
lib/std/zig/parse_string_literal.zig
@@ -19,17 +19,19 @@ pub fn parseStringLiteral(
     bytes: []const u8,
     bad_index: *usize, // populated if error.InvalidCharacter is returned
 ) ParseStringLiteralError![]u8 {
-    const first_index = if (bytes[0] == 'c') @as(usize, 2) else @as(usize, 1);
-    assert(bytes[bytes.len - 1] == '"');
+    assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"');
 
     var list = std.ArrayList(u8).init(allocator);
     errdefer list.deinit();
 
-    const slice = bytes[first_index..];
+    const slice = bytes[1..];
     try list.ensureCapacity(slice.len - 1);
 
     var state = State.Start;
-    for (slice) |b, index| {
+    var index: usize = 0;
+    while (index < slice.len) : (index += 1) {
+        const b = slice[index];
+
         switch (state) {
             State.Start => switch (b) {
                 '\\' => state = State.Backslash,
@@ -41,9 +43,6 @@ pub fn parseStringLiteral(
                 else => try list.append(b),
             },
             State.Backslash => switch (b) {
-                'x' => @panic("TODO"),
-                'u' => @panic("TODO"),
-                'U' => @panic("TODO"),
                 'n' => {
                     try list.append('\n');
                     state = State.Start;
@@ -60,10 +59,46 @@ pub fn parseStringLiteral(
                     try list.append('\t');
                     state = State.Start;
                 },
+                '\'' => {
+                    try list.append('\'');
+                    state = State.Start;
+                },
                 '"' => {
                     try list.append('"');
                     state = State.Start;
                 },
+                'x' => {
+                    // TODO: add more/better/broader tests for this.
+                    const index_continue = index + 3;
+                    if (slice.len >= index_continue)
+                        if (std.fmt.parseUnsigned(u8, slice[index + 1 .. index_continue], 16)) |char| {
+                            try list.append(char);
+                            state = State.Start;
+                            index = index_continue - 1; // loop-header increments again
+                            continue;
+                        } else |_| {};
+
+                    bad_index.* = index;
+                    return error.InvalidCharacter;
+                },
+                'u' => {
+                    // TODO: add more/better/broader tests for this.
+                    if (slice.len > index + 2 and slice[index + 1] == '{')
+                        if (std.mem.indexOfScalarPos(u8, slice[0..std.math.min(index + 9, slice.len)], index + 3, '}')) |index_end| {
+                            const hex_str = slice[index + 2 .. index_end];
+                            if (std.fmt.parseUnsigned(u32, hex_str, 16)) |uint| {
+                                if (uint <= 0x10ffff) {
+                                    try list.appendSlice(std.mem.toBytes(uint)[0..]);
+                                    state = State.Start;
+                                    index = index_end; // loop-header increments
+                                    continue;
+                                }
+                            } else |_| {}
+                        };
+
+                    bad_index.* = index;
+                    return error.InvalidCharacter;
+                },
                 else => {
                     bad_index.* = index;
                     return error.InvalidCharacter;
@@ -74,3 +109,17 @@ pub fn parseStringLiteral(
     }
     unreachable;
 }
+
+test "parseStringLiteral" {
+    const expect = std.testing.expect;
+    const eql = std.mem.eql;
+
+    var fixed_buf_mem: [32]u8 = undefined;
+    var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buf_mem[0..]);
+    var alloc = &fixed_buf_alloc.allocator;
+    var bad_index: usize = undefined;
+
+    expect(eql(u8, "foo", try parseStringLiteral(alloc, "\"foo\"", &bad_index)));
+    expect(eql(u8, "foo", try parseStringLiteral(alloc, "\"f\x6f\x6f\"", &bad_index)));
+    expect(eql(u8, "f💯", try parseStringLiteral(alloc, "\"f\u{1f4af}\"", &bad_index)));
+}