Commit fd10baf748

Motiejus Jakštys <motiejus@jakstys.lt>
2022-08-19 13:53:00
os.copy_file_range: save a syscall for most operations
Currenty copy_file_range always uses at least two syscalls: 1. As many as it needs to do the initial copy (always 1 during my testing) 2. The last one is always when offset is the size of the file. The second syscall is used to detect the terminating condition. However, because we do a stat for other reasons, we know the size of the file, and we can skip the syscall. Sparse files: since copy_file_range expands holes of sparse files, I conclude that this layer was not intended to work with sparse files. In other words, this commit does not make it worse for sparse file society. Test program ------------ const std = @import("std"); pub fn main() !void { const arg1 = std.mem.span(std.os.argv[1]); const arg2 = std.mem.span(std.os.argv[2]); try std.fs.cwd().copyFile(arg1, std.fs.cwd(), arg2, .{}); } Test output (current master) ---------------------------- Observe two `copy_file_range` syscalls: one with 209 bytes, one with zero: $ zig build-exe cp.zig $ strace ./cp ./cp.zig ./cp2.zig |& grep copy_file_range copy_file_range(3, [0], 5, [0], 4294967295, 0) = 209 copy_file_range(3, [209], 5, [209], 4294967295, 0) = 0 $ Test output (this diff) ----------------------- Observe a single `copy_file_range` syscall with 209 bytes: $ /code/zig/build/zig build-exe cp.zig $ strace ./cp ./cp.zig ./cp2.zig |& grep copy_file_range copy_file_range(3, [0], 5, [0], 4294967295, 0) = 209 $
1 parent 5ec76cf
Changed files (1)
lib
std
lib/std/fs.zig
@@ -2562,7 +2562,7 @@ pub const Dir = struct {
         var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = mode });
         defer atomic_file.deinit();
 
-        try copy_file(in_file.handle, atomic_file.file.handle);
+        try copy_file(in_file.handle, atomic_file.file.handle, size);
         try atomic_file.finish();
     }
 
@@ -3041,7 +3041,7 @@ const CopyFileRawError = error{SystemResources} || os.CopyFileRangeError || os.S
 // Transfer all the data between two file descriptors in the most efficient way.
 // The copy starts at offset 0, the initial offsets are preserved.
 // No metadata is transferred over.
-fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileRawError!void {
+fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t, maybe_size: ?u64) CopyFileRawError!void {
     if (comptime builtin.target.isDarwin()) {
         const rc = os.system.fcopyfile(fd_in, fd_out, null, os.system.COPYFILE_DATA);
         switch (os.errno(rc)) {
@@ -3064,7 +3064,10 @@ fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileRawError!void {
             // a 32 bit value so that the syscall won't return EINVAL except for
             // impossibly large files (> 2^64-1 - 2^32-1).
             const amt = try os.copy_file_range(fd_in, offset, fd_out, offset, math.maxInt(u32), 0);
-            // Terminate when no data was copied
+            // Terminate as soon as we have copied size bytes or no bytes
+            if (maybe_size) |s| {
+                if (s == amt) break :cfr_loop;
+            }
             if (amt == 0) break :cfr_loop;
             offset += amt;
         }
@@ -3077,7 +3080,10 @@ fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileRawError!void {
     var offset: u64 = 0;
     sendfile_loop: while (true) {
         const amt = try os.sendfile(fd_out, fd_in, offset, 0, &empty_iovec, &empty_iovec, 0);
-        // Terminate when no data was copied
+        // Terminate as soon as we have copied size bytes or no bytes
+        if (maybe_size) |s| {
+            if (s == amt) break :sendfile_loop;
+        }
         if (amt == 0) break :sendfile_loop;
         offset += amt;
     }