Commit 08e5daa7d5

Jonathan Marler <johnnymarler@gmail.com>
2021-07-02 06:23:10
Add std.unicode.fmtUtf16le
1 parent ee173d5
Changed files (1)
lib
lib/std/unicode.zig
@@ -317,9 +317,9 @@ pub const Utf16LeIterator = struct {
         assert(it.i <= it.bytes.len);
         if (it.i == it.bytes.len) return null;
         const c0: u21 = mem.readIntLittle(u16, it.bytes[it.i..][0..2]);
+        it.i += 2;
         if (c0 & ~@as(u21, 0x03ff) == 0xd800) {
             // surrogate pair
-            it.i += 2;
             if (it.i >= it.bytes.len) return error.DanglingSurrogateHalf;
             const c1: u21 = mem.readIntLittle(u16, it.bytes[it.i..][0..2]);
             if (c1 & ~@as(u21, 0x03ff) != 0xdc00) return error.ExpectedSecondSurrogateHalf;
@@ -328,7 +328,6 @@ pub const Utf16LeIterator = struct {
         } else if (c0 & ~@as(u21, 0x03ff) == 0xdc00) {
             return error.UnexpectedSecondSurrogateHalf;
         } else {
-            it.i += 2;
             return c0;
         }
     }
@@ -769,6 +768,48 @@ fn calcUtf16LeLen(utf8: []const u8) usize {
     return dest_len;
 }
 
+/// Print the given `utf16le` string
+fn formatUtf16le(
+    utf16le: []const u16,
+    comptime fmt: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    const unknown_codepoint = 0xfffd;
+    _ = fmt;
+    _ = options;
+    var buf: [300]u8 = undefined; // just a random size I chose
+    var it = Utf16LeIterator.init(utf16le);
+    var u8len: usize = 0;
+    while (it.nextCodepoint() catch unknown_codepoint) |codepoint| {
+        u8len += utf8Encode(codepoint, buf[u8len..]) catch
+            utf8Encode(unknown_codepoint, buf[u8len..]) catch unreachable;
+        if (u8len + 3 >= buf.len) {
+            try writer.writeAll(buf[0..u8len]);
+            u8len = 0;
+        }
+    }
+    try writer.writeAll(buf[0..u8len]);
+}
+
+/// Return a Formatter for a Utf16le string
+pub fn fmtUtf16le(utf16le: []const u16) std.fmt.Formatter(formatUtf16le) {
+    return .{ .data = utf16le };
+}
+
+test "fmtUtf16le" {
+    const expectFmt = std.testing.expectFmt;
+    try expectFmt("", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral(""))});
+    try expectFmt("foo", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("foo"))});
+    try expectFmt("𐐷", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("𐐷"))});
+    try expectFmt("퟿", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\xff\xd7")})});
+    try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\x00\xd8")})});
+    try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\xff\xdb")})});
+    try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\x00\xdc")})});
+    try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\xff\xdf")})});
+    try expectFmt("", "{}", .{fmtUtf16le(&[_]u16{std.mem.readIntNative(u16, "\x00\xe0")})});
+}
+
 test "utf8ToUtf16LeStringLiteral" {
     {
         const bytes = [_:0]u16{