Commit `6aa3570cb0`

Ryan Liptak <squeek502@hotmail.com>

2025-10-11 08:04:40

windows: Make readLinkW APIs output WTF-16, reduce stack usage of callers

- Affects the following functions: + `std.fs.Dir.readLinkW` + `std.os.windows.ReadLink` + `std.os.windows.ntToWin32Namespace` + `std.posix.readlinkW` + `std.posix.readlinkatW` Each of these functions (except `ntToWin32Namespace`) took WTF-16 as input and would output WTF-8, which makes optimal buffer re-use difficult at callsites and could force unnecessary WTF-16 <-> WTF-8 conversion during an intermediate step. The functions have been updated to output WTF-16, and also allow for the path and the output to re-use the same buffer (i.e. in-place modification), which can reduce the stack usage at callsites. For example, all of `std.fs.Dir.readLink`/`readLinkZ`/`std.posix.readlink`/`readlinkZ`/`readlinkat`/`readlinkatZ` have had their stack usage reduced by one PathSpace struct (64 KiB) when targeting Windows. The new `ntToWin32Namespace` takes an output buffer and returns a slice from that instead of returning a PathSpace, which is necessary to make the above possible.

master

1 parent 06a7597

Changed files (4)

lib

std

@@ -1354,8 +1354,14 @@ pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u
         return self.readLinkWasi(sub_path, buffer);
     }
     if (native_os == .windows) {
-        const sub_path_w = try windows.sliceToPrefixedFileW(self.fd, sub_path);
-        return self.readLinkW(sub_path_w.span(), buffer);
+        var sub_path_w = try windows.sliceToPrefixedFileW(self.fd, sub_path);
+        const result_w = try self.readLinkW(sub_path_w.span(), &sub_path_w.data);
+
+        const len = std.unicode.calcWtf8Len(result_w);
+        if (len > buffer.len) return error.NameTooLong;
+
+        const end_index = std.unicode.wtf16LeToWtf8(buffer, result_w);
+        return buffer[0..end_index];
     }
     const sub_path_c = try posix.toPosixPath(sub_path);
     return self.readLinkZ(&sub_path_c, buffer);
@@ -1369,15 +1375,24 @@ pub fn readLinkWasi(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
 /// Same as `readLink`, except the `sub_path_c` parameter is null-terminated.
 pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
     if (native_os == .windows) {
-        const sub_path_w = try windows.cStrToPrefixedFileW(self.fd, sub_path_c);
-        return self.readLinkW(sub_path_w.span(), buffer);
+        var sub_path_w = try windows.cStrToPrefixedFileW(self.fd, sub_path_c);
+        const result_w = try self.readLinkW(sub_path_w.span(), &sub_path_w.data);
+
+        const len = std.unicode.calcWtf8Len(result_w);
+        if (len > buffer.len) return error.NameTooLong;
+
+        const end_index = std.unicode.wtf16LeToWtf8(buffer, result_w);
+        return buffer[0..end_index];
     }
     return posix.readlinkatZ(self.fd, sub_path_c, buffer);
 }
 
-/// Windows-only. Same as `readLink` except the pathname parameter
-/// is WTF16 LE encoded.
-pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {
+/// Windows-only. Same as `readLink` except the path parameter
+/// is WTF-16 LE encoded, NT-prefixed.
+///
+/// `sub_path_w` will never be accessed after `buffer` has been written to, so it
+/// is safe to reuse a single buffer for both.
+pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u16) ![]u16 {
     return windows.ReadLink(self.fd, sub_path_w, buffer);
 }

@@ -193,10 +193,16 @@ test "Dir.readLink" {
             // test 1: symlink to a file
             try setupSymlink(ctx.dir, file_target_path, "symlink1", .{});
             try testReadLink(ctx.dir, canonical_file_target_path, "symlink1");
+            if (builtin.os.tag == .windows) {
+                try testReadLinkW(testing.allocator, ctx.dir, canonical_file_target_path, "symlink1");
+            }
 
             // test 2: symlink to a directory (can be different on Windows)
             try setupSymlink(ctx.dir, dir_target_path, "symlink2", .{ .is_directory = true });
             try testReadLink(ctx.dir, canonical_dir_target_path, "symlink2");
+            if (builtin.os.tag == .windows) {
+                try testReadLinkW(testing.allocator, ctx.dir, canonical_dir_target_path, "symlink2");
+            }
 
             // test 3: relative path symlink
             const parent_file = ".." ++ fs.path.sep_str ++ "target.txt";
@@ -205,6 +211,9 @@ test "Dir.readLink" {
             defer subdir.close();
             try setupSymlink(subdir, canonical_parent_file, "relative-link.txt", .{});
             try testReadLink(subdir, canonical_parent_file, "relative-link.txt");
+            if (builtin.os.tag == .windows) {
+                try testReadLinkW(testing.allocator, subdir, canonical_parent_file, "relative-link.txt");
+            }
         }
     }.impl);
 }
@@ -215,6 +224,17 @@ fn testReadLink(dir: Dir, target_path: []const u8, symlink_path: []const u8) !vo
     try testing.expectEqualStrings(target_path, actual);
 }
 
+fn testReadLinkW(allocator: mem.Allocator, dir: Dir, target_path: []const u8, symlink_path: []const u8) !void {
+    const target_path_w = try std.unicode.wtf8ToWtf16LeAlloc(allocator, target_path);
+    defer allocator.free(target_path_w);
+    // Calling the W functions directly requires the path to be NT-prefixed
+    const symlink_path_w = try std.os.windows.sliceToPrefixedFileW(dir.fd, symlink_path);
+    const wtf16_buffer = try allocator.alloc(u16, target_path_w.len);
+    defer allocator.free(wtf16_buffer);
+    const actual = try dir.readLinkW(symlink_path_w.span(), wtf16_buffer);
+    try testing.expectEqualSlices(u16, target_path_w, actual);
+}
+
 fn testReadLinkAbsolute(target_path: []const u8, symlink_path: []const u8) !void {
     var buffer: [fs.max_path_bytes]u8 = undefined;
     const given = try fs.readLinkAbsolute(symlink_path, buffer[0..]);

@@ -894,7 +894,9 @@ pub const ReadLinkError = error{
     UnsupportedReparsePointType,
 };
 
-pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
+/// `sub_path_w` will never be accessed after `out_buffer` has been written to, so it
+/// is safe to reuse a single buffer for both.
+pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
     const result_handle = OpenFile(sub_path_w, .{
         .access_mask = FILE_READ_ATTRIBUTES | SYNCHRONIZE,
         .dir = dir,
@@ -926,14 +928,14 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin
             const len = buf.SubstituteNameLength >> 1;
             const path_buf = @as([*]const u16, &buf.PathBuffer);
             const is_relative = buf.Flags & SYMLINK_FLAG_RELATIVE != 0;
-            return parseReadlinkPath(path_buf[offset..][0..len], is_relative, out_buffer);
+            return parseReadLinkPath(path_buf[offset..][0..len], is_relative, out_buffer);
         },
         IO_REPARSE_TAG_MOUNT_POINT => {
             const buf: *const MOUNT_POINT_REPARSE_BUFFER = @ptrCast(@alignCast(&reparse_struct.DataBuffer[0]));
             const offset = buf.SubstituteNameOffset >> 1;
             const len = buf.SubstituteNameLength >> 1;
             const path_buf = @as([*]const u16, &buf.PathBuffer);
-            return parseReadlinkPath(path_buf[offset..][0..len], false, out_buffer);
+            return parseReadLinkPath(path_buf[offset..][0..len], false, out_buffer);
         },
         else => {
             return error.UnsupportedReparsePointType;
@@ -941,19 +943,18 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin
     }
 }
 
-/// Asserts that there is enough space is `out_buffer`.
-/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
-fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u8 {
-    const win32_namespace_path = path: {
-        if (is_relative) break :path path;
-        const win32_path = ntToWin32Namespace(path) catch |err| switch (err) {
-            error.NameTooLong => unreachable,
-            error.NotNtPath => break :path path,
+fn parseReadLinkPath(path: []const u16, is_relative: bool, out_buffer: []u16) error{NameTooLong}![]u16 {
+    path: {
+        if (is_relative) break :path;
+        return ntToWin32Namespace(path, out_buffer) catch |err| switch (err) {
+            error.NameTooLong => |e| return e,
+            error.NotNtPath => break :path,
         };
-        break :path win32_path.span();
-    };
-    const out_len = std.unicode.wtf16LeToWtf8(out_buffer, win32_namespace_path);
-    return out_buffer[0..out_len];
+    }
+    if (out_buffer.len < path.len) return error.NameTooLong;
+    const dest = out_buffer[0..path.len];
+    @memcpy(dest, path);
+    return dest;
 }
 
 pub const DeleteFileError = error{
@@ -2584,10 +2585,11 @@ test getUnprefixedPathType {
 /// https://github.com/reactos/reactos/blob/master/modules/rostests/apitests/ntdll/RtlNtPathNameToDosPathName.c
 ///
 /// `path` should be encoded as WTF-16LE.
-pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
+///
+/// Supports in-place modification (`path` and `out` may refer to the same slice).
+pub fn ntToWin32Namespace(path: []const u16, out: []u16) error{ NameTooLong, NotNtPath }![]u16 {
     if (path.len > PATH_MAX_WIDE) return error.NameTooLong;
 
-    var path_space: PathSpace = undefined;
     const namespace_prefix = getNamespacePrefix(u16, path);
     switch (namespace_prefix) {
         .nt => {
@@ -2595,23 +2597,19 @@ pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
             var after_prefix = path[4..]; // after the `\??\`
             // The prefix \??\UNC\ means this is a UNC path, in which case the
             // `\??\UNC\` should be replaced by `\\` (two backslashes)
-            // TODO: the "UNC" should technically be matched case-insensitively, but
-            //       it's unlikely to matter since most/all paths passed into this
-            //       function will have come from the OS meaning it should have
-            //       the 'canonical' uppercase UNC.
             const is_unc = after_prefix.len >= 4 and
-                std.mem.eql(u16, after_prefix[0..3], std.unicode.utf8ToUtf16LeStringLiteral("UNC")) and
+                eqlIgnoreCaseWTF16(after_prefix[0..3], std.unicode.utf8ToUtf16LeStringLiteral("UNC")) and
                 std.fs.path.PathType.windows.isSep(u16, std.mem.littleToNative(u16, after_prefix[3]));
+            const win32_len = path.len - @as(usize, if (is_unc) 6 else 4);
+            if (out.len < win32_len) return error.NameTooLong;
             if (is_unc) {
-                path_space.data[0] = comptime std.mem.nativeToLittle(u16, '\\');
+                out[0] = comptime std.mem.nativeToLittle(u16, '\\');
                 dest_index += 1;
                 // We want to include the last `\` of `\??\UNC\`
                 after_prefix = path[7..];
             }
-            @memcpy(path_space.data[dest_index..][0..after_prefix.len], after_prefix);
-            path_space.len = dest_index + after_prefix.len;
-            path_space.data[path_space.len] = 0;
-            return path_space;
+            @memmove(out[dest_index..][0..after_prefix.len], after_prefix);
+            return out[0..win32_len];
         },
         else => return error.NotNtPath,
     }
@@ -2620,25 +2618,14 @@ pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
 test ntToWin32Namespace {
     const L = std.unicode.utf8ToUtf16LeStringLiteral;
 
-    try testNtToWin32Namespace(L("UNC"), L("\\??\\UNC"));
-    try testNtToWin32Namespace(L("\\\\"), L("\\??\\UNC\\"));
-    try testNtToWin32Namespace(L("\\\\path1"), L("\\??\\UNC\\path1"));
-    try testNtToWin32Namespace(L("\\\\path1\\path2"), L("\\??\\UNC\\path1\\path2"));
+    var mutable_unc_path_buf = L("\\??\\UNC\\path1\\path2").*;
+    try std.testing.expectEqualSlices(u16, L("\\\\path1\\path2"), try ntToWin32Namespace(&mutable_unc_path_buf, &mutable_unc_path_buf));
 
-    try testNtToWin32Namespace(L(""), L("\\??\\"));
-    try testNtToWin32Namespace(L("C:"), L("\\??\\C:"));
-    try testNtToWin32Namespace(L("C:\\"), L("\\??\\C:\\"));
-    try testNtToWin32Namespace(L("C:\\test"), L("\\??\\C:\\test"));
-    try testNtToWin32Namespace(L("C:\\test\\"), L("\\??\\C:\\test\\"));
-
-    try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("foo")));
-    try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("C:\\test")));
-    try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("\\\\.\\test")));
-}
+    var mutable_path_buf = L("\\??\\C:\\test\\").*;
+    try std.testing.expectEqualSlices(u16, L("C:\\test\\"), try ntToWin32Namespace(&mutable_path_buf, &mutable_path_buf));
 
-fn testNtToWin32Namespace(expected: []const u16, path: []const u16) !void {
-    const converted = try ntToWin32Namespace(path);
-    try std.testing.expectEqualSlices(u16, expected, converted.span());
+    var too_small_buf: [6]u16 = undefined;
+    try std.testing.expectError(error.NameTooLong, ntToWin32Namespace(L("\\??\\C:\\test"), &too_small_buf));
 }
 
 fn getFullPathNameW(path: [*:0]const u16, out: []u16) !usize {

@@ -3021,26 +3021,42 @@ pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
     if (native_os == .wasi and !builtin.link_libc) {
         return readlinkat(AT.FDCWD, file_path, out_buffer);
     } else if (native_os == .windows) {
-        const file_path_w = try windows.sliceToPrefixedFileW(null, file_path);
-        return readlinkW(file_path_w.span(), out_buffer);
+        var file_path_w = try windows.sliceToPrefixedFileW(null, file_path);
+        const result_w = try readlinkW(file_path_w.span(), &file_path_w.data);
+
+        const len = std.unicode.calcWtf8Len(result_w);
+        if (len > out_buffer.len) return error.NameTooLong;
+
+        const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
+        return out_buffer[0..end_index];
     } else {
         const file_path_c = try toPosixPath(file_path);
         return readlinkZ(&file_path_c, out_buffer);
     }
 }
 
-/// Windows-only. Same as `readlink` except `file_path` is WTF16 LE encoded.
-/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
+/// Windows-only. Same as `readlink` except `file_path` is WTF-16 LE encoded, NT-prefixed.
+/// The result is encoded as WTF-16 LE.
+///
+/// `file_path` will never be accessed after `out_buffer` has been written to, so it
+/// is safe to reuse a single buffer for both.
+///
 /// See also `readlinkZ`.
-pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
+pub fn readlinkW(file_path: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
     return windows.ReadLink(fs.cwd().fd, file_path, out_buffer);
 }
 
 /// Same as `readlink` except `file_path` is null-terminated.
 pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {
     if (native_os == .windows) {
-        const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
-        return readlinkW(file_path_w.span(), out_buffer);
+        var file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
+        const result_w = try readlinkW(file_path_w.span(), &file_path_w.data);
+
+        const len = std.unicode.calcWtf8Len(result_w);
+        if (len > out_buffer.len) return error.NameTooLong;
+
+        const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
+        return out_buffer[0..end_index];
     } else if (native_os == .wasi and !builtin.link_libc) {
         return readlink(mem.sliceTo(file_path, 0), out_buffer);
     }
@@ -3075,8 +3091,14 @@ pub fn readlinkat(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) ReadLink
         return readlinkatWasi(dirfd, file_path, out_buffer);
     }
     if (native_os == .windows) {
-        const file_path_w = try windows.sliceToPrefixedFileW(dirfd, file_path);
-        return readlinkatW(dirfd, file_path_w.span(), out_buffer);
+        var file_path_w = try windows.sliceToPrefixedFileW(dirfd, file_path);
+        const result_w = try readlinkatW(dirfd, file_path_w.span(), &file_path_w.data);
+
+        const len = std.unicode.calcWtf8Len(result_w);
+        if (len > out_buffer.len) return error.NameTooLong;
+
+        const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
+        return out_buffer[0..end_index];
     }
     const file_path_c = try toPosixPath(file_path);
     return readlinkatZ(dirfd, &file_path_c, out_buffer);
@@ -3103,10 +3125,14 @@ pub fn readlinkatWasi(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) Read
     }
 }
 
-/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 LE encoded.
-/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
+/// Windows-only. Same as `readlinkat` except `file_path` WTF16 LE encoded, NT-prefixed.
+/// The result is encoded as WTF-16 LE.
+///
+/// `file_path` will never be accessed after `out_buffer` has been written to, so it
+/// is safe to reuse a single buffer for both.
+///
 /// See also `readlinkat`.
-pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
+pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
     return windows.ReadLink(dirfd, file_path, out_buffer);
 }
 
@@ -3114,8 +3140,14 @@ pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLi
 /// See also `readlinkat`.
 pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {
     if (native_os == .windows) {
-        const file_path_w = try windows.cStrToPrefixedFileW(dirfd, file_path);
-        return readlinkatW(dirfd, file_path_w.span(), out_buffer);
+        var file_path_w = try windows.cStrToPrefixedFileW(dirfd, file_path);
+        const result_w = try readlinkatW(dirfd, file_path_w.span(), &file_path_w.data);
+
+        const len = std.unicode.calcWtf8Len(result_w);
+        if (len > out_buffer.len) return error.NameTooLong;
+
+        const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
+        return out_buffer[0..end_index];
     } else if (native_os == .wasi and !builtin.link_libc) {
         return readlinkat(dirfd, mem.sliceTo(file_path, 0), out_buffer);
     }

Commit 6aa3570cb0

Commit `6aa3570cb0`