Commit e2c741f1e7

Andrew Kelley <andrew@ziglang.org>
2020-08-18 21:44:00
std.cache_hash: additionally use file size to detect modifications
I have observed on Linux writing and reading the same file many times without the mtime changing, despite the file system having nanosecond granularity (and about 1 millisecond worth of nanoseconds passing between modifications). I am calling this a Linux Kernel Bug and adding file size to the cache hash manifest as a mitigation. As evidence, macOS does not exhibit this behavior. This means it is possible, on Linux, for a file to be added to the cache hash, and, if it is updated with the same file size, same inode, within about 1 millisecond, the cache system will give us a false positive, saying it is unmodified. I don't see any way to improve this situation without fixing the bug in the Linux kernel. closes #6082
1 parent bdb8c49
Changed files (1)
lib/std/cache_hash.zig
@@ -188,11 +188,13 @@ pub const CacheHash = struct {
             };
 
             var iter = mem.tokenize(line, " ");
+            const size = iter.next() orelse return error.InvalidFormat;
             const inode = iter.next() orelse return error.InvalidFormat;
             const mtime_nsec_str = iter.next() orelse return error.InvalidFormat;
             const digest_str = iter.next() orelse return error.InvalidFormat;
             const file_path = iter.rest();
 
+            cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;
             cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;
             cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
             base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat;
@@ -216,10 +218,11 @@ pub const CacheHash = struct {
             defer this_file.close();
 
             const actual_stat = try this_file.stat();
+            const size_match = actual_stat.size == cache_hash_file.stat.size;
             const mtime_match = actual_stat.mtime == cache_hash_file.stat.mtime;
             const inode_match = actual_stat.inode == cache_hash_file.stat.inode;
 
-            if (!mtime_match or !inode_match) {
+            if (!size_match or !mtime_match or !inode_match) {
                 self.manifest_dirty = true;
 
                 cache_hash_file.stat = actual_stat;
@@ -392,7 +395,7 @@ pub const CacheHash = struct {
 
         for (self.files.items) |file| {
             base64_encoder.encode(encoded_digest[0..], &file.bin_digest);
-            try outStream.print("{} {} {} {}\n", .{ file.stat.inode, file.stat.mtime, encoded_digest[0..], file.path });
+            try outStream.print("{} {} {} {} {}\n", .{ file.stat.size, file.stat.inode, file.stat.mtime, encoded_digest[0..], file.path });
         }
 
         try self.manifest_file.?.pwriteAll(contents.items, 0);
@@ -451,17 +454,10 @@ fn isProblematicTimestamp(fs_clock: i128) bool {
     // to detect precision of seconds, because looking at the zero bits in base
     // 2 would not detect precision of the seconds value.
     const fs_sec = @intCast(i64, @divFloor(fs_clock, std.time.ns_per_s));
-    var fs_nsec = @intCast(i64, @mod(fs_clock, std.time.ns_per_s));
+    const fs_nsec = @intCast(i64, @mod(fs_clock, std.time.ns_per_s));
     var wall_sec = @intCast(i64, @divFloor(wall_clock, std.time.ns_per_s));
     var wall_nsec = @intCast(i64, @mod(wall_clock, std.time.ns_per_s));
 
-    if (std.Target.current.os.tag == .linux) {
-        // TODO As a temporary measure while we figure out how to solve
-        // https://github.com/ziglang/zig/issues/6082, we cut the granularity of nanoseconds
-        // by a large amount.
-        fs_nsec &= @as(i64, -1) << 23;
-    }
-
     // First make all the least significant zero bits in the fs_clock, also zero bits in the wall clock.
     if (fs_nsec == 0) {
         wall_nsec = 0;