Commit cf38ce9701

Jared Miller <1668550+Jared-Miller@users.noreply.github.com>
2020-02-20 21:25:24
Implement UTF-8 to UTF-16LE literal conversion
1 parent 6ac76bc
Changed files (1)
lib
lib/std/unicode.zig
@@ -629,3 +629,71 @@ test "utf8ToUtf16LeWithNull" {
         testing.expect(utf16[2] == 0);
     }
 }
+
+/// Converts a UTF-8 string literal into a UTF-16LE string literal. 
+pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) :0] u16 {
+    comptime {
+        const len: usize = calcUtf16LeLen(utf8);
+        var utf16le: [len :0]u16 = [_ :0]u16{0} ** len;
+        const utf16le_len = utf8ToUtf16Le(&utf16le, utf8[0..]) catch |err| @compileError(err);
+        assert(len == utf16le_len);
+        return &utf16le;
+    }
+}
+
+/// Returns length of a supplied UTF-8 string literal. Asserts that the data is valid UTF-8.
+fn calcUtf16LeLen(utf8: []const u8) usize {
+    var src_i: usize = 0;
+    var dest_len: usize = 0;
+    while (src_i < utf8.len) {
+        const n = utf8ByteSequenceLength(utf8[src_i]) catch unreachable;
+        const next_src_i = src_i + n;
+        const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch unreachable;
+        if (codepoint < 0x10000) {
+            dest_len += 1;
+        } else {
+            dest_len += 2;
+        }
+        src_i = next_src_i;
+    }
+    return dest_len;
+}
+
+test "utf8ToUtf16LeStringLiteral" {
+{
+        const bytes = [_:0]u16{ 0x41 };
+        const utf16 = utf8ToUtf16LeStringLiteral("A");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[1] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0xD801, 0xDC37 };
+        const utf16 = utf8ToUtf16LeStringLiteral("𐐷");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[2] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0x02FF };
+        const utf16 = utf8ToUtf16LeStringLiteral("\u{02FF}");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[1] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0x7FF };
+        const utf16 = utf8ToUtf16LeStringLiteral("\u{7FF}");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[1] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0x801 };
+        const utf16 = utf8ToUtf16LeStringLiteral("\u{801}");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[1] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0xDBFF, 0xDFFF };
+        const utf16 = utf8ToUtf16LeStringLiteral("\u{10FFFF}");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[2] == 0);
+    }
+}