Commit 279607cae5
Changed files (3)
lib
lib/std/io/writer.zig
@@ -45,6 +45,13 @@ pub fn Writer(
}
}
+ pub fn writeBytesNTimes(self: Self, bytes: []const u8, n: usize) Error!void {
+ var i: usize = 0;
+ while (i < n) : (i += 1) {
+ try self.writeAll(bytes);
+ }
+ }
+
pub inline fn writeInt(self: Self, comptime T: type, value: T, endian: std.builtin.Endian) Error!void {
var bytes: [@divExact(@typeInfo(T).Int.bits, 8)]u8 = undefined;
mem.writeInt(std.math.ByteAlignedInt(@TypeOf(value)), &bytes, value, endian);
lib/std/fmt.zig
@@ -23,7 +23,7 @@ pub const FormatOptions = struct {
precision: ?usize = null,
width: ?usize = null,
alignment: Alignment = .right,
- fill: u8 = ' ',
+ fill: u21 = ' ',
};
/// Renders fmt string with args, calling `writer` with slices of bytes.
@@ -211,14 +211,18 @@ fn cacheString(str: anytype) []const u8 {
pub const Placeholder = struct {
specifier_arg: []const u8,
- fill: u8,
+ fill: u21,
alignment: Alignment,
arg: Specifier,
width: Specifier,
precision: Specifier,
pub fn parse(comptime str: anytype) Placeholder {
- comptime var parser = Parser{ .buf = &str };
+ const view = std.unicode.Utf8View.initComptime(&str);
+ comptime var parser = Parser{
+ .buf = &str,
+ .iter = view.iterator(),
+ };
// Parse the positional argument number
const arg = comptime parser.specifier() catch |err|
@@ -230,7 +234,7 @@ pub const Placeholder = struct {
// Skip the colon, if present
if (comptime parser.char()) |ch| {
if (ch != ':') {
- @compileError("expected : or }, found '" ++ [1]u8{ch} ++ "'");
+ @compileError("expected : or }, found '" ++ unicode.utf8EncodeComptime(ch) ++ "'");
}
}
@@ -265,7 +269,7 @@ pub const Placeholder = struct {
// Skip the dot, if present
if (comptime parser.char()) |ch| {
if (ch != '.') {
- @compileError("expected . or }, found '" ++ [1]u8{ch} ++ "'");
+ @compileError("expected . or }, found '" ++ unicode.utf8EncodeComptime(ch) ++ "'");
}
}
@@ -274,7 +278,7 @@ pub const Placeholder = struct {
@compileError(@errorName(err));
if (comptime parser.char()) |ch| {
- @compileError("extraneous trailing character '" ++ [1]u8{ch} ++ "'");
+ @compileError("extraneous trailing character '" ++ unicode.utf8EncodeComptime(ch) ++ "'");
}
return Placeholder{
@@ -297,21 +301,23 @@ pub const Specifier = union(enum) {
pub const Parser = struct {
buf: []const u8,
pos: usize = 0,
+ iter: std.unicode.Utf8Iterator = undefined,
// Returns a decimal number or null if the current character is not a
// digit
pub fn number(self: *@This()) ?usize {
var r: ?usize = null;
- while (self.pos < self.buf.len) : (self.pos += 1) {
- switch (self.buf[self.pos]) {
+ while (self.peek(0)) |code_point| {
+ switch (code_point) {
'0'...'9' => {
if (r == null) r = 0;
r.? *= 10;
- r.? += self.buf[self.pos] - '0';
+ r.? += code_point - '0';
},
else => break,
}
+ _ = self.iter.nextCodepoint();
}
return r;
@@ -319,31 +325,27 @@ pub const Parser = struct {
// Returns a substring of the input starting from the current position
// and ending where `ch` is found or until the end if not found
- pub fn until(self: *@This(), ch: u8) []const u8 {
- const start = self.pos;
-
- if (start >= self.buf.len)
- return &[_]u8{};
-
- while (self.pos < self.buf.len) : (self.pos += 1) {
- if (self.buf[self.pos] == ch) break;
+ pub fn until(self: *@This(), ch: u21) []const u8 {
+ var result: []const u8 = &[_]u8{};
+ while (self.peek(0)) |code_point| {
+ if (code_point == ch)
+ break;
+ result = result ++ (self.iter.nextCodepointSlice() orelse &[_]u8{});
}
- return self.buf[start..self.pos];
+ return result;
}
// Returns one character, if available
- pub fn char(self: *@This()) ?u8 {
- if (self.pos < self.buf.len) {
- const ch = self.buf[self.pos];
- self.pos += 1;
- return ch;
+ pub fn char(self: *@This()) ?u21 {
+ if (self.iter.nextCodepoint()) |code_point| {
+ return code_point;
}
return null;
}
- pub fn maybe(self: *@This(), val: u8) bool {
- if (self.pos < self.buf.len and self.buf[self.pos] == val) {
- self.pos += 1;
+ pub fn maybe(self: *@This(), val: u21) bool {
+ if (self.peek(0) == val) {
+ _ = self.iter.nextCodepoint();
return true;
}
return false;
@@ -367,8 +369,17 @@ pub const Parser = struct {
}
// Returns the n-th next character or null if that's past the end
- pub fn peek(self: *@This(), n: usize) ?u8 {
- return if (self.pos + n < self.buf.len) self.buf[self.pos + n] else null;
+ pub fn peek(self: *@This(), n: usize) ?u21 {
+ const original_i = self.iter.i;
+ defer self.iter.i = original_i;
+
+ var i = 0;
+ var code_point: ?u21 = null;
+ while (i <= n) : (i += 1) {
+ code_point = self.iter.nextCodepoint();
+ if (code_point == null) return null;
+ }
+ return code_point;
}
};
@@ -965,8 +976,7 @@ pub fn formatUnicodeCodepoint(
var buf: [4]u8 = undefined;
const len = unicode.utf8Encode(c, &buf) catch |err| switch (err) {
error.Utf8CannotEncodeSurrogateHalf, error.CodepointTooLarge => {
- const len = unicode.utf8Encode(unicode.replacement_character, &buf) catch unreachable;
- return formatBuf(buf[0..len], options, writer);
+ return formatBuf(&unicode.utf8EncodeComptime(unicode.replacement_character), options, writer);
},
};
return formatBuf(buf[0..len], options, writer);
@@ -985,20 +995,28 @@ pub fn formatBuf(
if (padding == 0)
return writer.writeAll(buf);
+ var fill_buffer: [4]u8 = undefined;
+ const fill_utf8 = if (unicode.utf8Encode(options.fill, &fill_buffer)) |len|
+ fill_buffer[0..len]
+ else |err| switch (err) {
+ error.Utf8CannotEncodeSurrogateHalf,
+ error.CodepointTooLarge,
+ => &unicode.utf8EncodeComptime(unicode.replacement_character),
+ };
switch (options.alignment) {
.left => {
try writer.writeAll(buf);
- try writer.writeByteNTimes(options.fill, padding);
+ try writer.writeBytesNTimes(fill_utf8, padding);
},
.center => {
const left_padding = padding / 2;
const right_padding = (padding + 1) / 2;
- try writer.writeByteNTimes(options.fill, left_padding);
+ try writer.writeBytesNTimes(fill_utf8, left_padding);
try writer.writeAll(buf);
- try writer.writeByteNTimes(options.fill, right_padding);
+ try writer.writeBytesNTimes(fill_utf8, right_padding);
},
.right => {
- try writer.writeByteNTimes(options.fill, padding);
+ try writer.writeBytesNTimes(fill_utf8, padding);
try writer.writeAll(buf);
},
}
@@ -2793,6 +2811,15 @@ test "padding" {
try expectFmt("a====", "{c:=<5}", .{'a'});
}
+test "padding fill char utf" {
+ try expectFmt("──crêpe───", "{s:─^10}", .{"crêpe"});
+ try expectFmt("─────crêpe", "{s:─>10}", .{"crêpe"});
+ try expectFmt("crêpe─────", "{s:─<10}", .{"crêpe"});
+ try expectFmt("────a", "{c:─>5}", .{'a'});
+ try expectFmt("──a──", "{c:─^5}", .{'a'});
+ try expectFmt("a────", "{c:─<5}", .{'a'});
+}
+
test "decimal float padding" {
const number: f32 = 3.1415;
try expectFmt("left-pad: **3.141\n", "left-pad: {d:*>7.3}\n", .{number});
lib/std/unicode.zig
@@ -69,6 +69,19 @@ pub fn utf8Encode(c: u21, out: []u8) !u3 {
return length;
}
+pub inline fn utf8EncodeComptime(comptime c: u21) [
+ utf8CodepointSequenceLength(c) catch |err|
+ @compileError(@errorName(err))
+]u8 {
+ comptime var result: [
+ utf8CodepointSequenceLength(c) catch
+ unreachable
+ ]u8 = undefined;
+ comptime assert((utf8Encode(c, &result) catch |err|
+ @compileError(@errorName(err))) == result.len);
+ return result;
+}
+
const Utf8DecodeError = Utf8Decode2Error || Utf8Decode3Error || Utf8Decode4Error;
/// Decodes the UTF-8 codepoint encoded in the given slice of bytes.
@@ -525,6 +538,13 @@ fn testUtf8Encode() !void {
try testing.expect(array[3] == 0b10001000);
}
+test "utf8 encode comptime" {
+ try testing.expectEqualSlices(u8, "€", &utf8EncodeComptime('€'));
+ try testing.expectEqualSlices(u8, "$", &utf8EncodeComptime('$'));
+ try testing.expectEqualSlices(u8, "¢", &utf8EncodeComptime('¢'));
+ try testing.expectEqualSlices(u8, "𐍈", &utf8EncodeComptime('𐍈'));
+}
+
test "utf8 encode error" {
try comptime testUtf8EncodeError();
try testUtf8EncodeError();