Commit 30b8b29f88

Andrew Kelley <andrew@ziglang.org>
2022-10-31 00:51:13
re-apply "Fix C include files not being in `whole` cache (#11365)"
This reverts commit 06310e3d4eb47fed88b175891cb5865bb050f020, reapplying commit a430630002bf02162ccbf8d3eb10fd73e490cefd. I deeply apologize to @moosichu and those affected by this bug. The original fix was actually fine. When I reverted it, I misremembered how the Cache API works. I thought the fix was going to introduce nondeterminism into the hash, but I forgot that the order of files in the manifest doesn't actually matter when checking for a cache hit. Actually, it does matter a little bit. This fix has a subtle downside which is that it does introduce the possibility of false negatives when checking for cache hits of 2+ iterations ago. For example, if the code goes from "foo", to "bar", and then back to "foo", it may look like a cache miss when it should have been a hit because 2 iterations ago the code was the same. However, this is an uncommon use case, and all it does is cause a bit of wasted time and disk space. That said, my suggestion from earlier still applies and would be a nice follow-up enhancement to this fix: The proper solution to this is to, in whole cache mode, append the hash inputs to some data structure, and then after the compilation is complete, do some kind of sorting on the hash inputs so that they will be the same order every time, then apply them in sequence. No lock on the Cache object is needed for this scheme. closes #11063
1 parent a5c96c4
src/Cache.zig
@@ -690,7 +690,7 @@ pub const Manifest = struct {
         while (true) {
             switch (it.next() orelse return) {
                 .target, .target_must_resolve => return,
-                .prereq => |bytes| try self.addFilePost(bytes),
+                .prereq => |file_path| try self.addFilePost(file_path),
                 else => |err| {
                     try err.printError(error_buf.writer());
                     log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items });
src/Compilation.zig
@@ -48,6 +48,7 @@ bin_file: *link.File,
 c_object_table: std.AutoArrayHashMapUnmanaged(*CObject, void) = .{},
 /// This is a pointer to a local variable inside `update()`.
 whole_cache_manifest: ?*Cache.Manifest = null,
+whole_cache_manifest_mutex: std.Thread.Mutex = .{},
 
 link_error_flags: link.File.ErrorFlags = .{},
 
@@ -2199,8 +2200,8 @@ pub fn update(comp: *Compilation) !void {
         // We are about to obtain this lock, so here we give other processes a chance first.
         comp.bin_file.releaseLock();
 
-        comp.whole_cache_manifest = &man;
         man = comp.cache_parent.obtain();
+        comp.whole_cache_manifest = &man;
         try comp.addNonIncrementalStuffToCacheManifest(&man);
 
         const is_hit = man.hit() catch |err| {
@@ -3598,6 +3599,8 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult {
         const dep_basename = std.fs.path.basename(out_dep_path);
         try man.addDepFilePost(zig_cache_tmp_dir, dep_basename);
         if (comp.whole_cache_manifest) |whole_cache_manifest| {
+            comp.whole_cache_manifest_mutex.lock();
+            defer comp.whole_cache_manifest_mutex.unlock();
             try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename);
         }
 
@@ -4060,6 +4063,11 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P
             const dep_basename = std.fs.path.basename(dep_file_path);
             // Add the files depended on to the cache system.
             try man.addDepFilePost(zig_cache_tmp_dir, dep_basename);
+            if (comp.whole_cache_manifest) |whole_cache_manifest| {
+                comp.whole_cache_manifest_mutex.lock();
+                defer comp.whole_cache_manifest_mutex.unlock();
+                try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename);
+            }
             // Just to save disk space, we delete the file because it is never needed again.
             zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| {
                 log.warn("failed to delete '{s}': {s}", .{ dep_file_path, @errorName(err) });
src/Module.zig
@@ -4523,7 +4523,7 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void {
             error.AnalysisFail => {},
         }
 
-        if (mod.comp.whole_cache_manifest) |man| {
+        if (mod.comp.whole_cache_manifest) |whole_cache_manifest| {
             const source = file.getSource(gpa) catch |err| {
                 try reportRetryableFileError(mod, file, "unable to load source: {s}", .{@errorName(err)});
                 return error.AnalysisFail;
@@ -4541,7 +4541,9 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void {
             };
             errdefer gpa.free(resolved_path);
 
-            try man.addFilePostContents(resolved_path, source.bytes, source.stat);
+            mod.comp.whole_cache_manifest_mutex.lock();
+            defer mod.comp.whole_cache_manifest_mutex.unlock();
+            try whole_cache_manifest.addFilePostContents(resolved_path, source.bytes, source.stat);
         }
     } else {
         new_decl.analysis = .file_failure;
@@ -5036,10 +5038,12 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb
         resolved_root_path, resolved_path, sub_file_path, rel_file_path,
     });
 
-    if (mod.comp.whole_cache_manifest) |man| {
+    if (mod.comp.whole_cache_manifest) |whole_cache_manifest| {
         const copied_resolved_path = try gpa.dupe(u8, resolved_path);
         errdefer gpa.free(copied_resolved_path);
-        try man.addFilePostContents(copied_resolved_path, bytes, stat);
+        mod.comp.whole_cache_manifest_mutex.lock();
+        defer mod.comp.whole_cache_manifest_mutex.unlock();
+        try whole_cache_manifest.addFilePostContents(copied_resolved_path, bytes, stat);
     }
 
     keep_resolved_path = true; // It's now owned by embed_table.
src/stage1.zig
@@ -459,10 +459,11 @@ export fn stage2_fetch_file(
     const comp = @intToPtr(*Compilation, stage1.userdata);
     const file_path = path_ptr[0..path_len];
     const max_file_size = std.math.maxInt(u32);
-    const contents = if (comp.whole_cache_manifest) |man|
-        man.addFilePostFetch(file_path, max_file_size) catch return null
-    else
-        std.fs.cwd().readFileAlloc(comp.gpa, file_path, max_file_size) catch return null;
+    const contents = if (comp.whole_cache_manifest) |man| blk: {
+        comp.whole_cache_manifest_mutex.lock();
+        defer comp.whole_cache_manifest_mutex.unlock();
+        break :blk man.addFilePostFetch(file_path, max_file_size) catch return null;
+    } else std.fs.cwd().readFileAlloc(comp.gpa, file_path, max_file_size) catch return null;
     result_len.* = contents.len;
     // TODO https://github.com/ziglang/zig/issues/3328#issuecomment-716749475
     if (contents.len == 0) return @intToPtr(?[*]const u8, 0x1);