Commit cd4397783f

Andrew Kelley <andrew@ziglang.org>
2023-10-09 05:03:40
Package.Fetch: improved deletion algorithm
Instead of every file deletion being followed by a recursive attempt to remove the parent directory, while walking the directory, every directory that will have nonzero files deleted from it is tracked in an array hash map. After all the threads have finished deleting and hashing, the parent thread sorts the "suspicious" directories by length, descending, ensuring that children appear before parents, and then iterates over the array hash map, attempting a rmdir operation on each. Any rmdir that succeeds appends the parent directory to the map so that it will be removed if empty.
1 parent 4a2cf38
Changed files (1)
src
Package
src/Package/Fetch.zig
@@ -1249,8 +1249,13 @@ fn computeHash(
     var all_files = std.ArrayList(*HashedFile).init(gpa);
     defer all_files.deinit();
 
-    var all_deletions = std.ArrayList(*DeletedFile).init(gpa);
-    defer all_deletions.deinit();
+    var deleted_files = std.ArrayList(*DeletedFile).init(gpa);
+    defer deleted_files.deinit();
+
+    // Track directories which had any files deleted from them so that empty directories
+    // can be deleted.
+    var sus_dirs: std.StringArrayHashMapUnmanaged(void) = .{};
+    defer sus_dirs.deinit(gpa);
 
     var walker = try @as(fs.IterableDir, .{ .dir = tmp_directory.handle }).walk(gpa);
     defer walker.deinit();
@@ -1274,16 +1279,22 @@ fn computeHash(
 
             if (!filter.includePath(entry.path)) {
                 // Delete instead of including in hash calculation.
+                const fs_path = try arena.dupe(u8, entry.path);
+
+                // Also track the parent directory in case it becomes empty.
+                if (fs.path.dirname(fs_path)) |parent|
+                    try sus_dirs.put(gpa, parent, {});
+
                 const deleted_file = try arena.create(DeletedFile);
                 deleted_file.* = .{
-                    .fs_path = try arena.dupe(u8, entry.path),
+                    .fs_path = fs_path,
                     .failure = undefined, // to be populated by the worker
                 };
                 wait_group.start();
                 try thread_pool.spawn(workerDeleteFile, .{
                     tmp_directory.handle, deleted_file, &wait_group,
                 });
-                try all_deletions.append(deleted_file);
+                try deleted_files.append(deleted_file);
                 continue;
             }
 
@@ -1300,8 +1311,8 @@ fn computeHash(
             if (std.mem.eql(u8, entry.path, Package.build_zig_basename))
                 f.has_build_zig = true;
 
-            const hashed_file = try arena.create(HashedFile);
             const fs_path = try arena.dupe(u8, entry.path);
+            const hashed_file = try arena.create(HashedFile);
             hashed_file.* = .{
                 .fs_path = fs_path,
                 .normalized_path = try normalizePath(arena, fs_path),
@@ -1317,6 +1328,36 @@ fn computeHash(
         }
     }
 
+    {
+        // Sort by length, descending, so that child directories get removed first.
+        sus_dirs.sortUnstable(@as(struct {
+            keys: []const []const u8,
+            pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
+                return ctx.keys[b_index].len < ctx.keys[a_index].len;
+            }
+        }, .{ .keys = sus_dirs.keys() }));
+
+        // During this loop, more entries will be added, so we must loop by index.
+        var i: usize = 0;
+        while (i < sus_dirs.count()) : (i += 1) {
+            const sus_dir = sus_dirs.keys()[i];
+            tmp_directory.handle.deleteDir(sus_dir) catch |err| switch (err) {
+                error.DirNotEmpty => continue,
+                error.FileNotFound => continue,
+                else => |e| {
+                    try eb.addRootErrorMessage(.{ .msg = try eb.printString(
+                        "unable to delete empty directory '{s}': {s}",
+                        .{ sus_dir, @errorName(e) },
+                    ) });
+                    return error.FetchFailed;
+                },
+            };
+            if (fs.path.dirname(sus_dir)) |parent| {
+                try sus_dirs.put(gpa, parent, {});
+            }
+        }
+    }
+
     std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan);
 
     var hasher = Manifest.Hash.init(.{});
@@ -1332,7 +1373,7 @@ fn computeHash(
         };
         hasher.update(&hashed_file.hash);
     }
-    for (all_deletions.items) |deleted_file| {
+    for (deleted_files.items) |deleted_file| {
         deleted_file.failure catch |err| {
             any_failures = true;
             try eb.addRootErrorMessage(.{
@@ -1382,16 +1423,6 @@ fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void
 
 fn deleteFileFallible(dir: fs.Dir, deleted_file: *DeletedFile) DeletedFile.Error!void {
     try dir.deleteFile(deleted_file.fs_path);
-    // In case the file was the last remaining file in the parent directory, attempt to
-    // remove the parent directory.
-    var opt_parent = fs.path.dirname(deleted_file.fs_path);
-    while (opt_parent) |parent| : (opt_parent = fs.path.dirname(parent)) {
-        dir.deleteDir(parent) catch |err| switch (err) {
-            error.DirNotEmpty => return,
-            error.FileNotFound => return,
-            else => |e| return e,
-        };
-    }
 }
 
 fn isExecutable(file: fs.File) !bool {