Commit 96cbdd145d

Andrew Kelley <andrew@ziglang.org>
2025-07-22 05:00:45
std.fs.File.Reader: fix sendFile logic
it wasn't accounting for both writer and reader buffering
1 parent b35c55e
Changed files (3)
lib/std/fs/File.zig
@@ -1435,7 +1435,7 @@ pub const Reader = struct {
                     }
                     return 0;
                 };
-                const n = @min(size - pos, std.math.maxInt(i64), @intFromEnum(limit));
+                const n = @min(size - pos, maxInt(i64), @intFromEnum(limit));
                 file.seekBy(n) catch |err| {
                     r.seek_err = err;
                     return 0;
@@ -1726,18 +1726,123 @@ pub const Writer = struct {
         file_reader: *Reader,
         limit: std.io.Limit,
     ) std.io.Writer.FileError!usize {
+        const reader_buffered = file_reader.interface.buffered();
+        if (reader_buffered.len >= @intFromEnum(limit))
+            return sendFileBuffered(io_w, file_reader, reader_buffered);
+        const writer_buffered = io_w.buffered();
+        const file_limit = @intFromEnum(limit) - reader_buffered.len;
         const w: *Writer = @alignCast(@fieldParentPtr("interface", io_w));
         const out_fd = w.file.handle;
         const in_fd = file_reader.file.handle;
-        // TODO try using copy_file_range on FreeBSD
-        // TODO try using sendfile on macOS
-        // TODO try using sendfile on FreeBSD
+
+        if (native_os == .freebsd and w.mode == .streaming) sf: {
+            // Try using sendfile on FreeBSD.
+            if (w.sendfile_err != null) break :sf;
+            const offset = std.math.cast(std.c.off_t, file_reader.pos) orelse break :sf;
+            var hdtr_data: std.c.sf_hdtr = undefined;
+            var headers: [2]posix.iovec_const = undefined;
+            var headers_i: u8 = 0;
+            if (writer_buffered.len != 0) {
+                headers[headers_i] = .{ .base = writer_buffered.ptr, .len = writer_buffered.len };
+                headers_i += 1;
+            }
+            if (reader_buffered.len != 0) {
+                headers[headers_i] = .{ .base = reader_buffered.ptr, .len = reader_buffered.len };
+                headers_i += 1;
+            }
+            const hdtr: ?*std.c.sf_hdtr = if (headers_i == 0) null else b: {
+                hdtr_data = .{
+                    .headers = &headers,
+                    .hdr_cnt = headers_i,
+                    .trailers = null,
+                    .trl_cnt = 0,
+                };
+                break :b &hdtr_data;
+            };
+            var sbytes: std.c.off_t = undefined;
+            const nbytes: usize = @min(file_limit, maxInt(usize));
+            const flags = 0;
+            switch (posix.errno(std.c.sendfile(in_fd, out_fd, offset, nbytes, hdtr, &sbytes, flags))) {
+                .SUCCESS, .INTR => {},
+                .INVAL, .OPNOTSUPP, .NOTSOCK, .NOSYS => w.sendfile_err = error.UnsupportedOperation,
+                .BADF => if (builtin.mode == .Debug) @panic("race condition") else {
+                    w.sendfile_err = error.Unexpected;
+                },
+                .FAULT => if (builtin.mode == .Debug) @panic("segmentation fault") else {
+                    w.sendfile_err = error.Unexpected;
+                },
+                .NOTCONN => w.sendfile_err = error.BrokenPipe,
+                .AGAIN, .BUSY => if (sbytes == 0) {
+                    w.sendfile_err = error.WouldBlock;
+                },
+                .IO => w.sendfile_err = error.InputOutput,
+                .PIPE => w.sendfile_err = error.BrokenPipe,
+                .NOBUFS => w.sendfile_err = error.SystemResources,
+                else => |err| w.sendfile_err = posix.unexpectedErrno(err),
+            }
+            const consumed = io_w.consume(@bitCast(sbytes));
+            file_reader.seekTo(file_reader.pos + consumed) catch return error.ReadFailed;
+            return consumed;
+        }
+
+        if (native_os.isDarwin() and w.mode == .streaming) sf: {
+            // Try using sendfile on macOS.
+            if (w.sendfile_err != null) break :sf;
+            const offset = std.math.cast(std.c.off_t, file_reader.pos) orelse break :sf;
+            var hdtr_data: std.c.sf_hdtr = undefined;
+            var headers: [2]posix.iovec_const = undefined;
+            var headers_i: u8 = 0;
+            if (writer_buffered.len != 0) {
+                headers[headers_i] = .{ .base = writer_buffered.ptr, .len = writer_buffered.len };
+                headers_i += 1;
+            }
+            if (reader_buffered.len != 0) {
+                headers[headers_i] = .{ .base = reader_buffered.ptr, .len = reader_buffered.len };
+                headers_i += 1;
+            }
+            const hdtr: ?*std.c.sf_hdtr = if (headers_i == 0) null else b: {
+                hdtr_data = .{
+                    .headers = &headers,
+                    .hdr_cnt = headers_i,
+                    .trailers = null,
+                    .trl_cnt = 0,
+                };
+                break :b &hdtr_data;
+            };
+            const max_count = maxInt(i32); // Avoid EINVAL.
+            var sbytes: std.c.off_t = @min(file_limit, max_count);
+            const flags = 0;
+            switch (posix.errno(std.c.sendfile(in_fd, out_fd, offset, &sbytes, hdtr, flags))) {
+                .SUCCESS, .INTR => {},
+                .OPNOTSUPP, .NOTSOCK, .NOSYS => w.sendfile_err = error.UnsupportedOperation,
+                .BADF => if (builtin.mode == .Debug) @panic("race condition") else {
+                    w.sendfile_err = error.Unexpected;
+                },
+                .FAULT => if (builtin.mode == .Debug) @panic("segmentation fault") else {
+                    w.sendfile_err = error.Unexpected;
+                },
+                .INVAL => if (builtin.mode == .Debug) @panic("invalid API usage") else {
+                    w.sendfile_err = error.Unexpected;
+                },
+                .NOTCONN => w.sendfile_err = error.BrokenPipe,
+                .AGAIN => if (sbytes == 0) {
+                    w.sendfile_err = error.WouldBlock;
+                },
+                .IO => w.sendfile_err = error.InputOutput,
+                .PIPE => w.sendfile_err = error.BrokenPipe,
+                else => |err| w.sendfile_err = posix.unexpectedErrno(err),
+            }
+            const consumed = io_w.consume(@bitCast(sbytes));
+            file_reader.seekTo(file_reader.pos + consumed) catch return error.ReadFailed;
+            return consumed;
+        }
+
         if (native_os == .linux and w.mode == .streaming) sf: {
             // Try using sendfile on Linux.
             if (w.sendfile_err != null) break :sf;
             // Linux sendfile does not support headers.
-            const buffered = limit.slice(file_reader.interface.buffer);
-            if (io_w.end != 0 or buffered.len != 0) return drain(io_w, &.{buffered}, 1);
+            if (writer_buffered.len != 0 or reader_buffered.len != 0)
+                return sendFileBuffered(io_w, file_reader, reader_buffered);
             const max_count = 0x7ffff000; // Avoid EINVAL.
             var off: std.os.linux.off_t = undefined;
             const off_ptr: ?*std.os.linux.off_t, const count: usize = switch (file_reader.mode) {
@@ -1784,6 +1889,7 @@ pub const Writer = struct {
             w.pos += n;
             return n;
         }
+
         const copy_file_range = switch (native_os) {
             .freebsd => std.os.freebsd.copy_file_range,
             .linux => if (std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 })) std.os.linux.wrapped.copy_file_range else {},
@@ -1791,8 +1897,8 @@ pub const Writer = struct {
         };
         if (@TypeOf(copy_file_range) != void) cfr: {
             if (w.copy_file_range_err != null) break :cfr;
-            const buffered = limit.slice(file_reader.interface.buffer);
-            if (io_w.end != 0 or buffered.len != 0) return drain(io_w, &.{buffered}, 1);
+            if (writer_buffered.len != 0 or reader_buffered.len != 0)
+                return sendFileBuffered(io_w, file_reader, reader_buffered);
             var off_in: i64 = undefined;
             var off_out: i64 = undefined;
             const off_in_ptr: ?*i64 = switch (file_reader.mode) {
@@ -1832,6 +1938,8 @@ pub const Writer = struct {
             if (w.pos != 0) break :fcf;
             if (limit != .unlimited) break :fcf;
             const size = file_reader.getSize() catch break :fcf;
+            if (writer_buffered.len != 0 or reader_buffered.len != 0)
+                return sendFileBuffered(io_w, file_reader, reader_buffered);
             const rc = std.c.fcopyfile(in_fd, out_fd, null, .{ .DATA = true });
             switch (posix.errno(rc)) {
                 .SUCCESS => {},
@@ -1860,6 +1968,16 @@ pub const Writer = struct {
         return error.Unimplemented;
     }
 
+    fn sendFileBuffered(
+        io_w: *std.io.Writer,
+        file_reader: *Reader,
+        reader_buffered: []const u8,
+    ) std.io.Writer.FileError!usize {
+        const n = try drain(io_w, &.{reader_buffered}, 1);
+        file_reader.seekTo(file_reader.pos + n) catch return error.ReadFailed;
+        return n;
+    }
+
     pub fn seekTo(w: *Writer, offset: u64) SeekError!void {
         switch (w.mode) {
             .positional, .positional_reading => {
lib/std/c.zig
@@ -10497,9 +10497,9 @@ pub const sysconf = switch (native_os) {
 
 pub const sf_hdtr = switch (native_os) {
     .freebsd, .macos, .ios, .tvos, .watchos, .visionos => extern struct {
-        headers: [*]const iovec_const,
+        headers: ?[*]const iovec_const,
         hdr_cnt: c_int,
-        trailers: [*]const iovec_const,
+        trailers: ?[*]const iovec_const,
         trl_cnt: c_int,
     },
     else => void,
lib/std/posix.zig
@@ -6326,295 +6326,6 @@ pub fn send(
     };
 }
 
-pub const SendFileError = PReadError || WriteError || SendError;
-
-/// Transfer data between file descriptors, with optional headers and trailers.
-///
-/// Returns the number of bytes written, which can be zero.
-///
-/// The `sendfile` call copies `in_len` bytes from one file descriptor to another. When possible,
-/// this is done within the operating system kernel, which can provide better performance
-/// characteristics than transferring data from kernel to user space and back, such as with
-/// `read` and `write` calls. When `in_len` is `0`, it means to copy until the end of the input file has been
-/// reached. Note, however, that partial writes are still possible in this case.
-///
-/// `in_fd` must be a file descriptor opened for reading, and `out_fd` must be a file descriptor
-/// opened for writing. They may be any kind of file descriptor; however, if `in_fd` is not a regular
-/// file system file, it may cause this function to fall back to calling `read` and `write`, in which case
-/// atomicity guarantees no longer apply.
-///
-/// Copying begins reading at `in_offset`. The input file descriptor seek position is ignored and not updated.
-/// If the output file descriptor has a seek position, it is updated as bytes are written. When
-/// `in_offset` is past the end of the input file, it successfully reads 0 bytes.
-///
-/// `flags` has different meanings per operating system; refer to the respective man pages.
-///
-/// These systems support atomically sending everything, including headers and trailers:
-/// * macOS
-/// * FreeBSD
-///
-/// These systems support in-kernel data copying, but headers and trailers are not sent atomically:
-/// * Linux
-///
-/// Other systems fall back to calling `read` / `write`.
-///
-/// Linux has a limit on how many bytes may be transferred in one `sendfile` call, which is `0x7ffff000`
-/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as
-/// well as stuffing the errno codes into the last `4096` values. This is noted on the `sendfile` man page.
-/// The limit on Darwin is `0x7fffffff`, trying to write more than that returns EINVAL.
-/// The corresponding POSIX limit on this is `maxInt(isize)`.
-pub fn sendfile(
-    out_fd: fd_t,
-    in_fd: fd_t,
-    in_offset: u64,
-    in_len: u64,
-    headers: []const iovec_const,
-    trailers: []const iovec_const,
-    flags: u32,
-) SendFileError!usize {
-    var header_done = false;
-    var total_written: usize = 0;
-
-    // Prevents EOVERFLOW.
-    const size_t = std.meta.Int(.unsigned, @typeInfo(usize).int.bits - 1);
-    const max_count = switch (native_os) {
-        .linux => 0x7ffff000,
-        .macos, .ios, .watchos, .tvos, .visionos => maxInt(i32),
-        else => maxInt(size_t),
-    };
-
-    switch (native_os) {
-        .linux => sf: {
-            if (headers.len != 0) {
-                const amt = try writev(out_fd, headers);
-                total_written += amt;
-                if (amt < count_iovec_bytes(headers)) return total_written;
-                header_done = true;
-            }
-
-            // Here we match BSD behavior, making a zero count value send as many bytes as possible.
-            const adjusted_count = if (in_len == 0) max_count else @min(in_len, max_count);
-
-            const sendfile_sym = if (lfs64_abi) system.sendfile64 else system.sendfile;
-            while (true) {
-                var offset: off_t = @bitCast(in_offset);
-                const rc = sendfile_sym(out_fd, in_fd, &offset, adjusted_count);
-                switch (errno(rc)) {
-                    .SUCCESS => {
-                        const amt: usize = @bitCast(rc);
-                        total_written += amt;
-                        if (in_len == 0 and amt == 0) {
-                            // We have detected EOF from `in_fd`.
-                            break;
-                        } else if (amt < in_len) {
-                            return total_written;
-                        } else {
-                            break;
-                        }
-                    },
-
-                    .BADF => unreachable, // Always a race condition.
-                    .FAULT => unreachable, // Segmentation fault.
-                    .OVERFLOW => unreachable, // We avoid passing too large of a `count`.
-                    .NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket
-
-                    .INVAL => {
-                        // EINVAL could be any of the following situations:
-                        // * Descriptor is not valid or locked
-                        // * an mmap(2)-like operation is  not  available  for in_fd
-                        // * count is negative
-                        // * out_fd has the APPEND flag set
-                        // Because of the "mmap(2)-like operation" possibility, we fall back to doing read/write
-                        // manually.
-                        break :sf;
-                    },
-                    .AGAIN => return error.WouldBlock,
-                    .IO => return error.InputOutput,
-                    .PIPE => return error.BrokenPipe,
-                    .NOMEM => return error.SystemResources,
-                    .NXIO => return error.Unseekable,
-                    .SPIPE => return error.Unseekable,
-                    else => |err| {
-                        unexpectedErrno(err) catch {};
-                        break :sf;
-                    },
-                }
-            }
-
-            if (trailers.len != 0) {
-                total_written += try writev(out_fd, trailers);
-            }
-
-            return total_written;
-        },
-        .freebsd => sf: {
-            var hdtr_data: std.c.sf_hdtr = undefined;
-            var hdtr: ?*std.c.sf_hdtr = null;
-            if (headers.len != 0 or trailers.len != 0) {
-                // Here we carefully avoid `@intCast` by returning partial writes when
-                // too many io vectors are provided.
-                const hdr_cnt = cast(u31, headers.len) orelse maxInt(u31);
-                if (headers.len > hdr_cnt) return writev(out_fd, headers);
-
-                const trl_cnt = cast(u31, trailers.len) orelse maxInt(u31);
-
-                hdtr_data = std.c.sf_hdtr{
-                    .headers = headers.ptr,
-                    .hdr_cnt = hdr_cnt,
-                    .trailers = trailers.ptr,
-                    .trl_cnt = trl_cnt,
-                };
-                hdtr = &hdtr_data;
-            }
-
-            while (true) {
-                var sbytes: off_t = undefined;
-                const err = errno(system.sendfile(in_fd, out_fd, @bitCast(in_offset), @min(in_len, max_count), hdtr, &sbytes, flags));
-                const amt: usize = @bitCast(sbytes);
-                switch (err) {
-                    .SUCCESS => return amt,
-
-                    .BADF => unreachable, // Always a race condition.
-                    .FAULT => unreachable, // Segmentation fault.
-                    .NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket
-
-                    .INVAL, .OPNOTSUPP, .NOTSOCK, .NOSYS => {
-                        // EINVAL could be any of the following situations:
-                        // * The fd argument is not a regular file.
-                        // * The s argument is not a SOCK.STREAM type socket.
-                        // * The offset argument is negative.
-                        // Because of some of these possibilities, we fall back to doing read/write
-                        // manually, the same as ENOSYS.
-                        break :sf;
-                    },
-
-                    .INTR => if (amt != 0) return amt else continue,
-
-                    .AGAIN => if (amt != 0) {
-                        return amt;
-                    } else {
-                        return error.WouldBlock;
-                    },
-
-                    .BUSY => if (amt != 0) {
-                        return amt;
-                    } else {
-                        return error.WouldBlock;
-                    },
-
-                    .IO => return error.InputOutput,
-                    .NOBUFS => return error.SystemResources,
-                    .PIPE => return error.BrokenPipe,
-
-                    else => {
-                        unexpectedErrno(err) catch {};
-                        if (amt != 0) {
-                            return amt;
-                        } else {
-                            break :sf;
-                        }
-                    },
-                }
-            }
-        },
-        .macos, .ios, .tvos, .watchos, .visionos => sf: {
-            var hdtr_data: std.c.sf_hdtr = undefined;
-            var hdtr: ?*std.c.sf_hdtr = null;
-            if (headers.len != 0 or trailers.len != 0) {
-                // Here we carefully avoid `@intCast` by returning partial writes when
-                // too many io vectors are provided.
-                const hdr_cnt = cast(u31, headers.len) orelse maxInt(u31);
-                if (headers.len > hdr_cnt) return writev(out_fd, headers);
-
-                const trl_cnt = cast(u31, trailers.len) orelse maxInt(u31);
-
-                hdtr_data = std.c.sf_hdtr{
-                    .headers = headers.ptr,
-                    .hdr_cnt = hdr_cnt,
-                    .trailers = trailers.ptr,
-                    .trl_cnt = trl_cnt,
-                };
-                hdtr = &hdtr_data;
-            }
-
-            while (true) {
-                var sbytes: off_t = @min(in_len, max_count);
-                const err = errno(system.sendfile(in_fd, out_fd, @bitCast(in_offset), &sbytes, hdtr, flags));
-                const amt: usize = @bitCast(sbytes);
-                switch (err) {
-                    .SUCCESS => return amt,
-
-                    .BADF => unreachable, // Always a race condition.
-                    .FAULT => unreachable, // Segmentation fault.
-                    .INVAL => unreachable,
-                    .NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket
-
-                    .OPNOTSUPP, .NOTSOCK, .NOSYS => break :sf,
-
-                    .INTR => if (amt != 0) return amt else continue,
-
-                    .AGAIN => if (amt != 0) {
-                        return amt;
-                    } else {
-                        return error.WouldBlock;
-                    },
-
-                    .IO => return error.InputOutput,
-                    .PIPE => return error.BrokenPipe,
-
-                    else => {
-                        unexpectedErrno(err) catch {};
-                        if (amt != 0) {
-                            return amt;
-                        } else {
-                            break :sf;
-                        }
-                    },
-                }
-            }
-        },
-        else => {}, // fall back to read/write
-    }
-
-    if (headers.len != 0 and !header_done) {
-        const amt = try writev(out_fd, headers);
-        total_written += amt;
-        if (amt < count_iovec_bytes(headers)) return total_written;
-    }
-
-    rw: {
-        var buf: [8 * 4096]u8 = undefined;
-        // Here we match BSD behavior, making a zero count value send as many bytes as possible.
-        const adjusted_count = if (in_len == 0) buf.len else @min(buf.len, in_len);
-        const amt_read = try pread(in_fd, buf[0..adjusted_count], in_offset);
-        if (amt_read == 0) {
-            if (in_len == 0) {
-                // We have detected EOF from `in_fd`.
-                break :rw;
-            } else {
-                return total_written;
-            }
-        }
-        const amt_written = try write(out_fd, buf[0..amt_read]);
-        total_written += amt_written;
-        if (amt_written < in_len or in_len == 0) return total_written;
-    }
-
-    if (trailers.len != 0) {
-        total_written += try writev(out_fd, trailers);
-    }
-
-    return total_written;
-}
-
-fn count_iovec_bytes(iovs: []const iovec_const) usize {
-    var count: usize = 0;
-    for (iovs) |iov| {
-        count += iov.len;
-    }
-    return count;
-}
-
 pub const CopyFileRangeError = error{
     FileTooBig,
     InputOutput,