master
   1//! Manages `zig-cache` directories.
   2//! This is not a general-purpose cache. It is designed to be fast and simple,
   3//! not to withstand attacks using specially-crafted input.
   4
   5const Cache = @This();
   6const builtin = @import("builtin");
   7
   8const std = @import("std");
   9const Io = std.Io;
  10const crypto = std.crypto;
  11const fs = std.fs;
  12const assert = std.debug.assert;
  13const testing = std.testing;
  14const mem = std.mem;
  15const fmt = std.fmt;
  16const Allocator = std.mem.Allocator;
  17const log = std.log.scoped(.cache);
  18
  19gpa: Allocator,
  20io: Io,
  21manifest_dir: fs.Dir,
  22hash: HashHelper = .{},
  23/// This value is accessed from multiple threads, protected by mutex.
  24recent_problematic_timestamp: Io.Timestamp = .zero,
  25mutex: Io.Mutex = .init,
  26
  27/// A set of strings such as the zig library directory or project source root, which
  28/// are stripped from the file paths before putting into the cache. They
  29/// are replaced with single-character indicators. This is not to save
  30/// space but to eliminate absolute file paths. This improves portability
  31/// and usefulness of the cache for advanced use cases.
  32prefixes_buffer: [4]Directory = undefined,
  33prefixes_len: usize = 0,
  34
  35pub const Path = @import("Cache/Path.zig");
  36pub const Directory = @import("Cache/Directory.zig");
  37pub const DepTokenizer = @import("Cache/DepTokenizer.zig");
  38
  39pub fn addPrefix(cache: *Cache, directory: Directory) void {
  40    cache.prefixes_buffer[cache.prefixes_len] = directory;
  41    cache.prefixes_len += 1;
  42}
  43
  44/// Be sure to call `Manifest.deinit` after successful initialization.
  45pub fn obtain(cache: *Cache) Manifest {
  46    return .{
  47        .cache = cache,
  48        .hash = cache.hash,
  49        .manifest_file = null,
  50        .manifest_dirty = false,
  51        .hex_digest = undefined,
  52    };
  53}
  54
  55pub fn prefixes(cache: *const Cache) []const Directory {
  56    return cache.prefixes_buffer[0..cache.prefixes_len];
  57}
  58
  59const PrefixedPath = struct {
  60    prefix: u8,
  61    sub_path: []const u8,
  62
  63    fn eql(a: PrefixedPath, b: PrefixedPath) bool {
  64        return a.prefix == b.prefix and std.mem.eql(u8, a.sub_path, b.sub_path);
  65    }
  66
  67    fn hash(pp: PrefixedPath) u32 {
  68        return @truncate(std.hash.Wyhash.hash(pp.prefix, pp.sub_path));
  69    }
  70};
  71
  72fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath {
  73    const gpa = cache.gpa;
  74    const resolved_path = try fs.path.resolve(gpa, &.{file_path});
  75    errdefer gpa.free(resolved_path);
  76    return findPrefixResolved(cache, resolved_path);
  77}
  78
  79/// Takes ownership of `resolved_path` on success.
  80fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath {
  81    const gpa = cache.gpa;
  82    const prefixes_slice = cache.prefixes();
  83    var i: u8 = 1; // Start at 1 to skip over checking the null prefix.
  84    while (i < prefixes_slice.len) : (i += 1) {
  85        const p = prefixes_slice[i].path.?;
  86        const sub_path = getPrefixSubpath(gpa, p, resolved_path) catch |err| switch (err) {
  87            error.NotASubPath => continue,
  88            else => |e| return e,
  89        };
  90        // Free the resolved path since we're not going to return it
  91        gpa.free(resolved_path);
  92        return PrefixedPath{
  93            .prefix = i,
  94            .sub_path = sub_path,
  95        };
  96    }
  97
  98    return PrefixedPath{
  99        .prefix = 0,
 100        .sub_path = resolved_path,
 101    };
 102}
 103
 104fn getPrefixSubpath(allocator: Allocator, prefix: []const u8, path: []u8) ![]u8 {
 105    const relative = try fs.path.relative(allocator, prefix, path);
 106    errdefer allocator.free(relative);
 107    var component_iterator = fs.path.NativeComponentIterator.init(relative);
 108    if (component_iterator.root() != null) {
 109        return error.NotASubPath;
 110    }
 111    const first_component = component_iterator.first();
 112    if (first_component != null and std.mem.eql(u8, first_component.?.name, "..")) {
 113        return error.NotASubPath;
 114    }
 115    return relative;
 116}
 117
 118/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6
 119pub const bin_digest_len = 16;
 120pub const hex_digest_len = bin_digest_len * 2;
 121pub const BinDigest = [bin_digest_len]u8;
 122pub const HexDigest = [hex_digest_len]u8;
 123
 124/// This is currently just an arbitrary non-empty string that can't match another manifest line.
 125const manifest_header = "0";
 126const manifest_file_size_max = 100 * 1024 * 1024;
 127
 128/// The type used for hashing file contents. Currently, this is SipHash128(1, 3), because it
 129/// provides enough collision resistance for the Manifest use cases, while being one of our
 130/// fastest options right now.
 131pub const Hasher = crypto.auth.siphash.SipHash128(1, 3);
 132
 133/// Initial state with random bytes, that can be copied.
 134/// Refresh this with new random bytes when the manifest
 135/// format is modified in a non-backwards-compatible way.
 136pub const hasher_init: Hasher = Hasher.init(&.{
 137    0x33, 0x52, 0xa2, 0x84,
 138    0xcf, 0x17, 0x56, 0x57,
 139    0x01, 0xbb, 0xcd, 0xe4,
 140    0x77, 0xd6, 0xf0, 0x60,
 141});
 142
 143pub const File = struct {
 144    prefixed_path: PrefixedPath,
 145    max_file_size: ?usize,
 146    /// Populated if the user calls `addOpenedFile`.
 147    /// The handle is not owned here.
 148    handle: ?fs.File,
 149    stat: Stat,
 150    bin_digest: BinDigest,
 151    contents: ?[]const u8,
 152
 153    pub const Stat = struct {
 154        inode: fs.File.INode,
 155        size: u64,
 156        mtime: Io.Timestamp,
 157
 158        pub fn fromFs(fs_stat: fs.File.Stat) Stat {
 159            return .{
 160                .inode = fs_stat.inode,
 161                .size = fs_stat.size,
 162                .mtime = fs_stat.mtime,
 163            };
 164        }
 165    };
 166
 167    pub fn deinit(self: *File, gpa: Allocator) void {
 168        gpa.free(self.prefixed_path.sub_path);
 169        if (self.contents) |contents| {
 170            gpa.free(contents);
 171            self.contents = null;
 172        }
 173        self.* = undefined;
 174    }
 175
 176    pub fn updateMaxSize(file: *File, new_max_size: ?usize) void {
 177        const new = new_max_size orelse return;
 178        file.max_file_size = if (file.max_file_size) |old| @max(old, new) else new;
 179    }
 180
 181    pub fn updateHandle(file: *File, new_handle: ?fs.File) void {
 182        const handle = new_handle orelse return;
 183        file.handle = handle;
 184    }
 185};
 186
 187pub const HashHelper = struct {
 188    hasher: Hasher = hasher_init,
 189
 190    /// Record a slice of bytes as a dependency of the process being cached.
 191    pub fn addBytes(hh: *HashHelper, bytes: []const u8) void {
 192        hh.hasher.update(mem.asBytes(&bytes.len));
 193        hh.hasher.update(bytes);
 194    }
 195
 196    pub fn addOptionalBytes(hh: *HashHelper, optional_bytes: ?[]const u8) void {
 197        hh.add(optional_bytes != null);
 198        hh.addBytes(optional_bytes orelse return);
 199    }
 200
 201    pub fn addListOfBytes(hh: *HashHelper, list_of_bytes: []const []const u8) void {
 202        hh.add(list_of_bytes.len);
 203        for (list_of_bytes) |bytes| hh.addBytes(bytes);
 204    }
 205
 206    pub fn addOptionalListOfBytes(hh: *HashHelper, optional_list_of_bytes: ?[]const []const u8) void {
 207        hh.add(optional_list_of_bytes != null);
 208        hh.addListOfBytes(optional_list_of_bytes orelse return);
 209    }
 210
 211    /// Convert the input value into bytes and record it as a dependency of the process being cached.
 212    pub fn add(hh: *HashHelper, x: anytype) void {
 213        switch (@TypeOf(x)) {
 214            std.SemanticVersion => {
 215                hh.add(x.major);
 216                hh.add(x.minor);
 217                hh.add(x.patch);
 218            },
 219            std.Target.Os.TaggedVersionRange => {
 220                switch (x) {
 221                    .hurd => |hurd| {
 222                        hh.add(hurd.range.min);
 223                        hh.add(hurd.range.max);
 224                        hh.add(hurd.glibc);
 225                    },
 226                    .linux => |linux| {
 227                        hh.add(linux.range.min);
 228                        hh.add(linux.range.max);
 229                        hh.add(linux.glibc);
 230                        hh.add(linux.android);
 231                    },
 232                    .windows => |windows| {
 233                        hh.add(windows.min);
 234                        hh.add(windows.max);
 235                    },
 236                    .semver => |semver| {
 237                        hh.add(semver.min);
 238                        hh.add(semver.max);
 239                    },
 240                    .none => {},
 241                }
 242            },
 243            std.zig.BuildId => switch (x) {
 244                .none, .fast, .uuid, .sha1, .md5 => hh.add(std.meta.activeTag(x)),
 245                .hexstring => |hex_string| hh.addBytes(hex_string.toSlice()),
 246            },
 247            else => switch (@typeInfo(@TypeOf(x))) {
 248                .bool, .int, .@"enum", .array => hh.addBytes(mem.asBytes(&x)),
 249                else => @compileError("unable to hash type " ++ @typeName(@TypeOf(x))),
 250            },
 251        }
 252    }
 253
 254    pub fn addOptional(hh: *HashHelper, optional: anytype) void {
 255        hh.add(optional != null);
 256        hh.add(optional orelse return);
 257    }
 258
 259    /// Returns a hex encoded hash of the inputs, without modifying state.
 260    pub fn peek(hh: HashHelper) [hex_digest_len]u8 {
 261        var copy = hh;
 262        return copy.final();
 263    }
 264
 265    pub fn peekBin(hh: HashHelper) BinDigest {
 266        var copy = hh;
 267        var bin_digest: BinDigest = undefined;
 268        copy.hasher.final(&bin_digest);
 269        return bin_digest;
 270    }
 271
 272    /// Returns a hex encoded hash of the inputs, mutating the state of the hasher.
 273    pub fn final(hh: *HashHelper) HexDigest {
 274        var bin_digest: BinDigest = undefined;
 275        hh.hasher.final(&bin_digest);
 276        return binToHex(bin_digest);
 277    }
 278
 279    pub fn oneShot(bytes: []const u8) [hex_digest_len]u8 {
 280        var hasher: Hasher = hasher_init;
 281        hasher.update(bytes);
 282        var bin_digest: BinDigest = undefined;
 283        hasher.final(&bin_digest);
 284        return binToHex(bin_digest);
 285    }
 286};
 287
 288pub fn binToHex(bin_digest: BinDigest) HexDigest {
 289    var out_digest: HexDigest = undefined;
 290    var w: std.Io.Writer = .fixed(&out_digest);
 291    w.printHex(&bin_digest, .lower) catch unreachable;
 292    return out_digest;
 293}
 294
 295pub const Lock = struct {
 296    manifest_file: fs.File,
 297
 298    pub fn release(lock: *Lock) void {
 299        if (builtin.os.tag == .windows) {
 300            // Windows does not guarantee that locks are immediately unlocked when
 301            // the file handle is closed. See LockFileEx documentation.
 302            lock.manifest_file.unlock();
 303        }
 304
 305        lock.manifest_file.close();
 306        lock.* = undefined;
 307    }
 308};
 309
 310pub const Manifest = struct {
 311    cache: *Cache,
 312    /// Current state for incremental hashing.
 313    hash: HashHelper,
 314    manifest_file: ?fs.File,
 315    manifest_dirty: bool,
 316    /// Set this flag to true before calling hit() in order to indicate that
 317    /// upon a cache hit, the code using the cache will not modify the files
 318    /// within the cache directory. This allows multiple processes to utilize
 319    /// the same cache directory at the same time.
 320    want_shared_lock: bool = true,
 321    have_exclusive_lock: bool = false,
 322    // Indicate that we want isProblematicTimestamp to perform a filesystem write in
 323    // order to obtain a problematic timestamp for the next call. Calls after that
 324    // will then use the same timestamp, to avoid unnecessary filesystem writes.
 325    want_refresh_timestamp: bool = true,
 326    files: Files = .{},
 327    hex_digest: HexDigest,
 328    diagnostic: Diagnostic = .none,
 329    /// Keeps track of the last time we performed a file system write to observe
 330    /// what time the file system thinks it is, according to its own granularity.
 331    recent_problematic_timestamp: Io.Timestamp = .zero,
 332
 333    pub const Diagnostic = union(enum) {
 334        none,
 335        manifest_create: fs.File.OpenError,
 336        manifest_read: fs.File.ReadError,
 337        manifest_lock: fs.File.LockError,
 338        file_open: FileOp,
 339        file_stat: FileOp,
 340        file_read: FileOp,
 341        file_hash: FileOp,
 342
 343        pub const FileOp = struct {
 344            file_index: usize,
 345            err: anyerror,
 346        };
 347    };
 348
 349    pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false);
 350
 351    pub const FilesContext = struct {
 352        pub fn hash(fc: FilesContext, file: File) u32 {
 353            _ = fc;
 354            return file.prefixed_path.hash();
 355        }
 356
 357        pub fn eql(fc: FilesContext, a: File, b: File, b_index: usize) bool {
 358            _ = fc;
 359            _ = b_index;
 360            return a.prefixed_path.eql(b.prefixed_path);
 361        }
 362    };
 363
 364    const FilesAdapter = struct {
 365        pub fn eql(context: @This(), a: PrefixedPath, b: File, b_index: usize) bool {
 366            _ = context;
 367            _ = b_index;
 368            return a.eql(b.prefixed_path);
 369        }
 370
 371        pub fn hash(context: @This(), key: PrefixedPath) u32 {
 372            _ = context;
 373            return key.hash();
 374        }
 375    };
 376
 377    /// Add a file as a dependency of process being cached. When `hit` is
 378    /// called, the file's contents will be checked to ensure that it matches
 379    /// the contents from previous times.
 380    ///
 381    /// Max file size will be used to determine the amount of space the file contents
 382    /// are allowed to take up in memory. If max_file_size is null, then the contents
 383    /// will not be loaded into memory.
 384    ///
 385    /// Returns the index of the entry in the `files` array list. You can use it
 386    /// to access the contents of the file after calling `hit()` like so:
 387    ///
 388    /// ```
 389    /// var file_contents = cache_hash.files.keys()[file_index].contents.?;
 390    /// ```
 391    pub fn addFilePath(m: *Manifest, file_path: Path, max_file_size: ?usize) !usize {
 392        return addOpenedFile(m, file_path, null, max_file_size);
 393    }
 394
 395    /// Same as `addFilePath` except the file has already been opened.
 396    pub fn addOpenedFile(m: *Manifest, path: Path, handle: ?fs.File, max_file_size: ?usize) !usize {
 397        const gpa = m.cache.gpa;
 398        try m.files.ensureUnusedCapacity(gpa, 1);
 399        const resolved_path = try fs.path.resolve(gpa, &.{
 400            path.root_dir.path orelse ".",
 401            path.subPathOrDot(),
 402        });
 403        errdefer gpa.free(resolved_path);
 404        const prefixed_path = try m.cache.findPrefixResolved(resolved_path);
 405        return addFileInner(m, prefixed_path, handle, max_file_size);
 406    }
 407
 408    /// Deprecated; use `addFilePath`.
 409    pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize {
 410        assert(self.manifest_file == null);
 411
 412        const gpa = self.cache.gpa;
 413        try self.files.ensureUnusedCapacity(gpa, 1);
 414        const prefixed_path = try self.cache.findPrefix(file_path);
 415        errdefer gpa.free(prefixed_path.sub_path);
 416
 417        return addFileInner(self, prefixed_path, null, max_file_size);
 418    }
 419
 420    fn addFileInner(self: *Manifest, prefixed_path: PrefixedPath, handle: ?fs.File, max_file_size: ?usize) usize {
 421        const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{});
 422        if (gop.found_existing) {
 423            self.cache.gpa.free(prefixed_path.sub_path);
 424            gop.key_ptr.updateMaxSize(max_file_size);
 425            gop.key_ptr.updateHandle(handle);
 426            return gop.index;
 427        }
 428        gop.key_ptr.* = .{
 429            .prefixed_path = prefixed_path,
 430            .contents = null,
 431            .max_file_size = max_file_size,
 432            .stat = undefined,
 433            .bin_digest = undefined,
 434            .handle = handle,
 435        };
 436
 437        self.hash.add(prefixed_path.prefix);
 438        self.hash.addBytes(prefixed_path.sub_path);
 439
 440        return gop.index;
 441    }
 442
 443    /// Deprecated, use `addOptionalFilePath`.
 444    pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void {
 445        self.hash.add(optional_file_path != null);
 446        const file_path = optional_file_path orelse return;
 447        _ = try self.addFile(file_path, null);
 448    }
 449
 450    pub fn addOptionalFilePath(self: *Manifest, optional_file_path: ?Path) !void {
 451        self.hash.add(optional_file_path != null);
 452        const file_path = optional_file_path orelse return;
 453        _ = try self.addFilePath(file_path, null);
 454    }
 455
 456    pub fn addListOfFiles(self: *Manifest, list_of_files: []const []const u8) !void {
 457        self.hash.add(list_of_files.len);
 458        for (list_of_files) |file_path| {
 459            _ = try self.addFile(file_path, null);
 460        }
 461    }
 462
 463    pub fn addDepFile(self: *Manifest, dir: fs.Dir, dep_file_sub_path: []const u8) !void {
 464        assert(self.manifest_file == null);
 465        return self.addDepFileMaybePost(dir, dep_file_sub_path);
 466    }
 467
 468    pub const HitError = error{
 469        /// Unable to check the cache for a reason that has been recorded into
 470        /// the `diagnostic` field.
 471        CacheCheckFailed,
 472        /// A cache manifest file exists however it could not be parsed.
 473        InvalidFormat,
 474        OutOfMemory,
 475        Canceled,
 476    };
 477
 478    /// Check the cache to see if the input exists in it. If it exists, returns `true`.
 479    /// A hex encoding of its hash is available by calling `final`.
 480    ///
 481    /// This function will also acquire an exclusive lock to the manifest file. This means
 482    /// that a process holding a Manifest will block any other process attempting to
 483    /// acquire the lock. If `want_shared_lock` is `true`, a cache hit guarantees the
 484    /// manifest file to be locked in shared mode, and a cache miss guarantees the manifest
 485    /// file to be locked in exclusive mode.
 486    ///
 487    /// The lock on the manifest file is released when `deinit` is called. As another
 488    /// option, one may call `toOwnedLock` to obtain a smaller object which can represent
 489    /// the lock. `deinit` is safe to call whether or not `toOwnedLock` has been called.
 490    pub fn hit(self: *Manifest) HitError!bool {
 491        assert(self.manifest_file == null);
 492
 493        self.diagnostic = .none;
 494
 495        const ext = ".txt";
 496        var manifest_file_path: [hex_digest_len + ext.len]u8 = undefined;
 497
 498        var bin_digest: BinDigest = undefined;
 499        self.hash.hasher.final(&bin_digest);
 500
 501        self.hex_digest = binToHex(bin_digest);
 502
 503        @memcpy(manifest_file_path[0..self.hex_digest.len], &self.hex_digest);
 504        manifest_file_path[hex_digest_len..][0..ext.len].* = ext.*;
 505
 506        // We'll try to open the cache with an exclusive lock, but if that would block
 507        // and `want_shared_lock` is set, a shared lock might be sufficient, so we'll
 508        // open with a shared lock instead.
 509        while (true) {
 510            if (self.cache.manifest_dir.createFile(&manifest_file_path, .{
 511                .read = true,
 512                .truncate = false,
 513                .lock = .exclusive,
 514                .lock_nonblocking = self.want_shared_lock,
 515            })) |manifest_file| {
 516                self.manifest_file = manifest_file;
 517                self.have_exclusive_lock = true;
 518                break;
 519            } else |err| switch (err) {
 520                error.WouldBlock => {
 521                    self.manifest_file = self.cache.manifest_dir.openFile(&manifest_file_path, .{
 522                        .mode = .read_write,
 523                        .lock = .shared,
 524                    }) catch |e| {
 525                        self.diagnostic = .{ .manifest_create = e };
 526                        return error.CacheCheckFailed;
 527                    };
 528                    break;
 529                },
 530                error.FileNotFound => {
 531                    // There are no dir components, so the only possibility
 532                    // should be that the directory behind the handle has been
 533                    // deleted, however we have observed on macOS two processes
 534                    // racing to do openat() with O_CREAT manifest in ENOENT.
 535                    //
 536                    // As a workaround, we retry with exclusive=true which
 537                    // disambiguates by returning EEXIST, indicating original
 538                    // failure was a race, or ENOENT, indicating deletion of
 539                    // the directory of our open handle.
 540                    if (!builtin.os.tag.isDarwin()) {
 541                        self.diagnostic = .{ .manifest_create = error.FileNotFound };
 542                        return error.CacheCheckFailed;
 543                    }
 544
 545                    if (self.cache.manifest_dir.createFile(&manifest_file_path, .{
 546                        .read = true,
 547                        .truncate = false,
 548                        .lock = .exclusive,
 549                        .lock_nonblocking = self.want_shared_lock,
 550                        .exclusive = true,
 551                    })) |manifest_file| {
 552                        self.manifest_file = manifest_file;
 553                        self.have_exclusive_lock = true;
 554                        break;
 555                    } else |excl_err| switch (excl_err) {
 556                        error.WouldBlock, error.PathAlreadyExists => continue,
 557                        error.FileNotFound => {
 558                            self.diagnostic = .{ .manifest_create = error.FileNotFound };
 559                            return error.CacheCheckFailed;
 560                        },
 561                        error.Canceled => return error.Canceled,
 562                        else => |e| {
 563                            self.diagnostic = .{ .manifest_create = e };
 564                            return error.CacheCheckFailed;
 565                        },
 566                    }
 567                },
 568                error.Canceled => return error.Canceled,
 569                else => |e| {
 570                    self.diagnostic = .{ .manifest_create = e };
 571                    return error.CacheCheckFailed;
 572                },
 573            }
 574        }
 575
 576        self.want_refresh_timestamp = true;
 577
 578        const input_file_count = self.files.entries.len;
 579
 580        // We're going to construct a second hash. Its input will begin with the digest we've
 581        // already computed (`bin_digest`), and then it'll have the digests of each input file,
 582        // including "post" files (see `addFilePost`). If this is a hit, we learn the set of "post"
 583        // files from the manifest on disk. If this is a miss, we'll learn those from future calls
 584        // to `addFilePost` etc. As such, the state of `self.hash.hasher` after this function
 585        // depends on whether this is a hit or a miss.
 586        //
 587        // If we return `true` indicating a cache hit, then `self.hash.hasher` must already include
 588        // the digests of the "post" files, so the caller can call `final`. Otherwise, on a cache
 589        // miss, `self.hash.hasher` will include the digests of all non-"post" files -- that is,
 590        // the ones we've already been told about. The rest will be discovered through calls to
 591        // `addFilePost` etc, which will update the hasher. After all files are added, the user can
 592        // use `final`, and will at some point `writeManifest` the file list to disk.
 593
 594        self.hash.hasher = hasher_init;
 595        self.hash.hasher.update(&bin_digest);
 596
 597        hit: {
 598            const file_digests_populated: usize = digests: {
 599                switch (try self.hitWithCurrentLock()) {
 600                    .hit => break :hit,
 601                    .miss => |m| if (!try self.upgradeToExclusiveLock()) {
 602                        break :digests m.file_digests_populated;
 603                    },
 604                }
 605                // We've just had a miss with the shared lock, and upgraded to an exclusive lock. Someone
 606                // else might have modified the digest, so we need to check again before deciding to miss.
 607                // Before trying again, we must reset `self.hash.hasher` and `self.files`.
 608                // This is basically just the first half of `unhit`.
 609                self.hash.hasher = hasher_init;
 610                self.hash.hasher.update(&bin_digest);
 611                while (self.files.count() != input_file_count) {
 612                    var file = self.files.pop().?;
 613                    file.key.deinit(self.cache.gpa);
 614                }
 615                switch (try self.hitWithCurrentLock()) {
 616                    .hit => break :hit,
 617                    .miss => |m| break :digests m.file_digests_populated,
 618                }
 619            };
 620
 621            // This is a guaranteed cache miss. We're almost ready to return `false`, but there's a
 622            // little bookkeeping to do first. The first `file_digests_populated` entries in `files`
 623            // have their `bin_digest` populated; there may be some left in `input_file_count` which
 624            // we'll need to populate ourselves. Other than that, this is basically `unhit`.
 625            self.manifest_dirty = true;
 626            self.hash.hasher = hasher_init;
 627            self.hash.hasher.update(&bin_digest);
 628            while (self.files.count() != input_file_count) {
 629                var file = self.files.pop().?;
 630                file.key.deinit(self.cache.gpa);
 631            }
 632            for (self.files.keys(), 0..) |*file, idx| {
 633                if (idx < file_digests_populated) {
 634                    // `bin_digest` is already populated by `hitWithCurrentLock`, so we can use it directly.
 635                    self.hash.hasher.update(&file.bin_digest);
 636                } else {
 637                    self.populateFileHash(file) catch |err| {
 638                        self.diagnostic = .{ .file_hash = .{
 639                            .file_index = idx,
 640                            .err = err,
 641                        } };
 642                        return error.CacheCheckFailed;
 643                    };
 644                }
 645            }
 646            return false;
 647        }
 648
 649        if (self.want_shared_lock) {
 650            self.downgradeToSharedLock() catch |err| {
 651                self.diagnostic = .{ .manifest_lock = err };
 652                return error.CacheCheckFailed;
 653            };
 654        }
 655
 656        return true;
 657    }
 658
 659    /// Assumes that `self.hash.hasher` has been updated only with the original digest and that
 660    /// `self.files` contains only the original input files.
 661    fn hitWithCurrentLock(self: *Manifest) HitError!union(enum) {
 662        hit,
 663        miss: struct {
 664            file_digests_populated: usize,
 665        },
 666    } {
 667        const gpa = self.cache.gpa;
 668        const io = self.cache.io;
 669        const input_file_count = self.files.entries.len;
 670        var tiny_buffer: [1]u8 = undefined; // allows allocRemaining to detect limit exceeded
 671        var manifest_reader = self.manifest_file.?.reader(io, &tiny_buffer); // Reads positionally from zero.
 672        const limit: std.Io.Limit = .limited(manifest_file_size_max);
 673        const file_contents = manifest_reader.interface.allocRemaining(gpa, limit) catch |err| switch (err) {
 674            error.OutOfMemory => return error.OutOfMemory,
 675            error.StreamTooLong => return error.OutOfMemory,
 676            error.ReadFailed => {
 677                self.diagnostic = .{ .manifest_read = manifest_reader.err.? };
 678                return error.CacheCheckFailed;
 679            },
 680        };
 681        defer gpa.free(file_contents);
 682
 683        var any_file_changed = false;
 684        var line_iter = mem.tokenizeScalar(u8, file_contents, '\n');
 685        var idx: usize = 0;
 686        const header_valid = valid: {
 687            const line = line_iter.next() orelse break :valid false;
 688            break :valid std.mem.eql(u8, line, manifest_header);
 689        };
 690        if (!header_valid) {
 691            return .{ .miss = .{ .file_digests_populated = 0 } };
 692        }
 693        while (line_iter.next()) |line| {
 694            defer idx += 1;
 695
 696            var iter = mem.tokenizeScalar(u8, line, ' ');
 697            const size = iter.next() orelse return error.InvalidFormat;
 698            const inode = iter.next() orelse return error.InvalidFormat;
 699            const mtime_nsec_str = iter.next() orelse return error.InvalidFormat;
 700            const digest_str = iter.next() orelse return error.InvalidFormat;
 701            const prefix_str = iter.next() orelse return error.InvalidFormat;
 702            const file_path = iter.rest();
 703
 704            const stat_size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;
 705            const stat_inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;
 706            const stat_mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
 707            const file_bin_digest = b: {
 708                if (digest_str.len != hex_digest_len) return error.InvalidFormat;
 709                var bd: BinDigest = undefined;
 710                _ = fmt.hexToBytes(&bd, digest_str) catch return error.InvalidFormat;
 711                break :b bd;
 712            };
 713
 714            const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat;
 715            if (prefix >= self.cache.prefixes_len) return error.InvalidFormat;
 716
 717            if (file_path.len == 0) return error.InvalidFormat;
 718
 719            const cache_hash_file = f: {
 720                const prefixed_path: PrefixedPath = .{
 721                    .prefix = prefix,
 722                    .sub_path = file_path, // expires with file_contents
 723                };
 724                if (idx < input_file_count) {
 725                    const file = &self.files.keys()[idx];
 726                    if (!file.prefixed_path.eql(prefixed_path))
 727                        return error.InvalidFormat;
 728
 729                    file.stat = .{
 730                        .size = stat_size,
 731                        .inode = stat_inode,
 732                        .mtime = .{ .nanoseconds = stat_mtime },
 733                    };
 734                    file.bin_digest = file_bin_digest;
 735                    break :f file;
 736                }
 737                const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
 738                errdefer _ = self.files.pop();
 739                if (!gop.found_existing) {
 740                    gop.key_ptr.* = .{
 741                        .prefixed_path = .{
 742                            .prefix = prefix,
 743                            .sub_path = try gpa.dupe(u8, file_path),
 744                        },
 745                        .contents = null,
 746                        .max_file_size = null,
 747                        .handle = null,
 748                        .stat = .{
 749                            .size = stat_size,
 750                            .inode = stat_inode,
 751                            .mtime = .{ .nanoseconds = stat_mtime },
 752                        },
 753                        .bin_digest = file_bin_digest,
 754                    };
 755                }
 756                break :f gop.key_ptr;
 757            };
 758
 759            const pp = cache_hash_file.prefixed_path;
 760            const dir = self.cache.prefixes()[pp.prefix].handle;
 761            const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) {
 762                error.FileNotFound => {
 763                    // Every digest before this one has been populated successfully.
 764                    return .{ .miss = .{ .file_digests_populated = idx } };
 765                },
 766                error.Canceled => return error.Canceled,
 767                else => |e| {
 768                    self.diagnostic = .{ .file_open = .{
 769                        .file_index = idx,
 770                        .err = e,
 771                    } };
 772                    return error.CacheCheckFailed;
 773                },
 774            };
 775            defer this_file.close();
 776
 777            const actual_stat = this_file.stat() catch |err| {
 778                self.diagnostic = .{ .file_stat = .{
 779                    .file_index = idx,
 780                    .err = err,
 781                } };
 782                return error.CacheCheckFailed;
 783            };
 784            const size_match = actual_stat.size == cache_hash_file.stat.size;
 785            const mtime_match = actual_stat.mtime.nanoseconds == cache_hash_file.stat.mtime.nanoseconds;
 786            const inode_match = actual_stat.inode == cache_hash_file.stat.inode;
 787
 788            if (!size_match or !mtime_match or !inode_match) {
 789                cache_hash_file.stat = .{
 790                    .size = actual_stat.size,
 791                    .mtime = actual_stat.mtime,
 792                    .inode = actual_stat.inode,
 793                };
 794
 795                if (try self.isProblematicTimestamp(cache_hash_file.stat.mtime)) {
 796                    // The actual file has an unreliable timestamp, force it to be hashed
 797                    cache_hash_file.stat.mtime = .zero;
 798                    cache_hash_file.stat.inode = 0;
 799                }
 800
 801                var actual_digest: BinDigest = undefined;
 802                hashFile(this_file, &actual_digest) catch |err| {
 803                    self.diagnostic = .{ .file_read = .{
 804                        .file_index = idx,
 805                        .err = err,
 806                    } };
 807                    return error.CacheCheckFailed;
 808                };
 809
 810                if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) {
 811                    cache_hash_file.bin_digest = actual_digest;
 812                    // keep going until we have the input file digests
 813                    any_file_changed = true;
 814                }
 815            }
 816
 817            if (!any_file_changed) {
 818                self.hash.hasher.update(&cache_hash_file.bin_digest);
 819            }
 820        }
 821
 822        // If the manifest was somehow missing one of our input files, or if any file hash has changed,
 823        // then this is a cache miss. However, we have successfully populated some or all of the file
 824        // digests.
 825        if (any_file_changed or idx < input_file_count) {
 826            return .{ .miss = .{ .file_digests_populated = idx } };
 827        }
 828
 829        return .hit;
 830    }
 831
 832    /// Reset `self.hash.hasher` to the state it should be in after `hit` returns `false`.
 833    /// The hasher contains the original input digest, and all original input file digests (i.e.
 834    /// not including post files).
 835    /// Assumes that `bin_digest` is populated for all files up to `input_file_count`. As such,
 836    /// this is not necessarily safe to call within `hit`.
 837    pub fn unhit(self: *Manifest, bin_digest: BinDigest, input_file_count: usize) void {
 838        // Reset the hash.
 839        self.hash.hasher = hasher_init;
 840        self.hash.hasher.update(&bin_digest);
 841
 842        // Remove files not in the initial hash.
 843        while (self.files.count() != input_file_count) {
 844            var file = self.files.pop().?;
 845            file.key.deinit(self.cache.gpa);
 846        }
 847
 848        for (self.files.keys()) |file| {
 849            self.hash.hasher.update(&file.bin_digest);
 850        }
 851    }
 852
 853    fn isProblematicTimestamp(man: *Manifest, timestamp: Io.Timestamp) error{Canceled}!bool {
 854        const io = man.cache.io;
 855
 856        // If the file_time is prior to the most recent problematic timestamp
 857        // then we don't need to access the filesystem.
 858        if (timestamp.nanoseconds < man.recent_problematic_timestamp.nanoseconds)
 859            return false;
 860
 861        // Next we will check the globally shared Cache timestamp, which is accessed
 862        // from multiple threads.
 863        try man.cache.mutex.lock(io);
 864        defer man.cache.mutex.unlock(io);
 865
 866        // Save the global one to our local one to avoid locking next time.
 867        man.recent_problematic_timestamp = man.cache.recent_problematic_timestamp;
 868        if (timestamp.nanoseconds < man.recent_problematic_timestamp.nanoseconds)
 869            return false;
 870
 871        // This flag prevents multiple filesystem writes for the same hit() call.
 872        if (man.want_refresh_timestamp) {
 873            man.want_refresh_timestamp = false;
 874
 875            var file = man.cache.manifest_dir.createFile("timestamp", .{
 876                .read = true,
 877                .truncate = true,
 878            }) catch |err| switch (err) {
 879                error.Canceled => return error.Canceled,
 880                else => return true,
 881            };
 882            defer file.close();
 883
 884            // Save locally and also save globally (we still hold the global lock).
 885            const stat = file.stat() catch |err| switch (err) {
 886                error.Canceled => return error.Canceled,
 887                else => return true,
 888            };
 889            man.recent_problematic_timestamp = stat.mtime;
 890            man.cache.recent_problematic_timestamp = man.recent_problematic_timestamp;
 891        }
 892
 893        return timestamp.nanoseconds >= man.recent_problematic_timestamp.nanoseconds;
 894    }
 895
 896    fn populateFileHash(self: *Manifest, ch_file: *File) !void {
 897        if (ch_file.handle) |handle| {
 898            return populateFileHashHandle(self, ch_file, handle);
 899        } else {
 900            const pp = ch_file.prefixed_path;
 901            const dir = self.cache.prefixes()[pp.prefix].handle;
 902            const handle = try dir.openFile(pp.sub_path, .{});
 903            defer handle.close();
 904            return populateFileHashHandle(self, ch_file, handle);
 905        }
 906    }
 907
 908    fn populateFileHashHandle(self: *Manifest, ch_file: *File, handle: fs.File) !void {
 909        const actual_stat = try handle.stat();
 910        ch_file.stat = .{
 911            .size = actual_stat.size,
 912            .mtime = actual_stat.mtime,
 913            .inode = actual_stat.inode,
 914        };
 915
 916        if (try self.isProblematicTimestamp(ch_file.stat.mtime)) {
 917            // The actual file has an unreliable timestamp, force it to be hashed
 918            ch_file.stat.mtime = .zero;
 919            ch_file.stat.inode = 0;
 920        }
 921
 922        if (ch_file.max_file_size) |max_file_size| {
 923            if (ch_file.stat.size > max_file_size) {
 924                return error.FileTooBig;
 925            }
 926
 927            const contents = try self.cache.gpa.alloc(u8, @as(usize, @intCast(ch_file.stat.size)));
 928            errdefer self.cache.gpa.free(contents);
 929
 930            // Hash while reading from disk, to keep the contents in the cpu cache while
 931            // doing hashing.
 932            var hasher = hasher_init;
 933            var off: usize = 0;
 934            while (true) {
 935                const bytes_read = try handle.pread(contents[off..], off);
 936                if (bytes_read == 0) break;
 937                hasher.update(contents[off..][0..bytes_read]);
 938                off += bytes_read;
 939            }
 940            hasher.final(&ch_file.bin_digest);
 941
 942            ch_file.contents = contents;
 943        } else {
 944            try hashFile(handle, &ch_file.bin_digest);
 945        }
 946
 947        self.hash.hasher.update(&ch_file.bin_digest);
 948    }
 949
 950    /// Add a file as a dependency of process being cached, after the initial hash has been
 951    /// calculated. This is useful for processes that don't know all the files that
 952    /// are depended on ahead of time. For example, a source file that can import other files
 953    /// will need to be recompiled if the imported file is changed.
 954    pub fn addFilePostFetch(self: *Manifest, file_path: []const u8, max_file_size: usize) ![]const u8 {
 955        assert(self.manifest_file != null);
 956
 957        const gpa = self.cache.gpa;
 958        const prefixed_path = try self.cache.findPrefix(file_path);
 959        errdefer gpa.free(prefixed_path.sub_path);
 960
 961        const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
 962        errdefer _ = self.files.pop();
 963
 964        if (gop.found_existing) {
 965            gpa.free(prefixed_path.sub_path);
 966            return gop.key_ptr.contents.?;
 967        }
 968
 969        gop.key_ptr.* = .{
 970            .prefixed_path = prefixed_path,
 971            .max_file_size = max_file_size,
 972            .stat = undefined,
 973            .bin_digest = undefined,
 974            .contents = null,
 975        };
 976
 977        self.files.lockPointers();
 978        defer self.files.unlockPointers();
 979
 980        try self.populateFileHash(gop.key_ptr);
 981        return gop.key_ptr.contents.?;
 982    }
 983
 984    /// Add a file as a dependency of process being cached, after the initial hash has been
 985    /// calculated.
 986    ///
 987    /// This is useful for processes that don't know the all the files that are
 988    /// depended on ahead of time. For example, a source file that can import
 989    /// other files will need to be recompiled if the imported file is changed.
 990    pub fn addFilePost(self: *Manifest, file_path: []const u8) !void {
 991        assert(self.manifest_file != null);
 992
 993        const gpa = self.cache.gpa;
 994        const prefixed_path = try self.cache.findPrefix(file_path);
 995        errdefer gpa.free(prefixed_path.sub_path);
 996
 997        const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
 998        errdefer _ = self.files.pop();
 999
1000        if (gop.found_existing) {
1001            gpa.free(prefixed_path.sub_path);
1002            return;
1003        }
1004
1005        gop.key_ptr.* = .{
1006            .prefixed_path = prefixed_path,
1007            .max_file_size = null,
1008            .handle = null,
1009            .stat = undefined,
1010            .bin_digest = undefined,
1011            .contents = null,
1012        };
1013
1014        self.files.lockPointers();
1015        defer self.files.unlockPointers();
1016
1017        try self.populateFileHash(gop.key_ptr);
1018    }
1019
1020    /// Like `addFilePost` but when the file contents have already been loaded from disk.
1021    pub fn addFilePostContents(
1022        self: *Manifest,
1023        file_path: []const u8,
1024        bytes: []const u8,
1025        stat: File.Stat,
1026    ) !void {
1027        assert(self.manifest_file != null);
1028        const gpa = self.cache.gpa;
1029
1030        const prefixed_path = try self.cache.findPrefix(file_path);
1031        errdefer gpa.free(prefixed_path.sub_path);
1032
1033        const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
1034        errdefer _ = self.files.pop();
1035
1036        if (gop.found_existing) {
1037            gpa.free(prefixed_path.sub_path);
1038            return;
1039        }
1040
1041        const new_file = gop.key_ptr;
1042
1043        new_file.* = .{
1044            .prefixed_path = prefixed_path,
1045            .max_file_size = null,
1046            .handle = null,
1047            .stat = stat,
1048            .bin_digest = undefined,
1049            .contents = null,
1050        };
1051
1052        if (try self.isProblematicTimestamp(new_file.stat.mtime)) {
1053            // The actual file has an unreliable timestamp, force it to be hashed
1054            new_file.stat.mtime = .zero;
1055            new_file.stat.inode = 0;
1056        }
1057
1058        {
1059            var hasher = hasher_init;
1060            hasher.update(bytes);
1061            hasher.final(&new_file.bin_digest);
1062        }
1063
1064        self.hash.hasher.update(&new_file.bin_digest);
1065    }
1066
1067    pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_sub_path: []const u8) !void {
1068        assert(self.manifest_file != null);
1069        return self.addDepFileMaybePost(dir, dep_file_sub_path);
1070    }
1071
1072    fn addDepFileMaybePost(self: *Manifest, dir: fs.Dir, dep_file_sub_path: []const u8) !void {
1073        const gpa = self.cache.gpa;
1074        const dep_file_contents = try dir.readFileAlloc(dep_file_sub_path, gpa, .limited(manifest_file_size_max));
1075        defer gpa.free(dep_file_contents);
1076
1077        var error_buf: std.ArrayList(u8) = .empty;
1078        defer error_buf.deinit(gpa);
1079
1080        var resolve_buf: std.ArrayList(u8) = .empty;
1081        defer resolve_buf.deinit(gpa);
1082
1083        var it: DepTokenizer = .{ .bytes = dep_file_contents };
1084        while (it.next()) |token| {
1085            switch (token) {
1086                // We don't care about targets, we only want the prereqs
1087                // Clang is invoked in single-source mode but other programs may not
1088                .target, .target_must_resolve => {},
1089                .prereq => |file_path| if (self.manifest_file == null) {
1090                    _ = try self.addFile(file_path, null);
1091                } else try self.addFilePost(file_path),
1092                .prereq_must_resolve => {
1093                    resolve_buf.clearRetainingCapacity();
1094                    try token.resolve(gpa, &resolve_buf);
1095                    if (self.manifest_file == null) {
1096                        _ = try self.addFile(resolve_buf.items, null);
1097                    } else try self.addFilePost(resolve_buf.items);
1098                },
1099                else => |err| {
1100                    try err.printError(gpa, &error_buf);
1101                    log.err("failed parsing {s}: {s}", .{ dep_file_sub_path, error_buf.items });
1102                    return error.InvalidDepFile;
1103                },
1104            }
1105        }
1106    }
1107
1108    /// Returns a binary hash of the inputs.
1109    pub fn finalBin(self: *Manifest) BinDigest {
1110        assert(self.manifest_file != null);
1111
1112        // We don't close the manifest file yet, because we want to
1113        // keep it locked until the API user is done using it.
1114        // We also don't write out the manifest yet, because until
1115        // cache_release is called we still might be working on creating
1116        // the artifacts to cache.
1117
1118        var bin_digest: BinDigest = undefined;
1119        self.hash.hasher.final(&bin_digest);
1120        return bin_digest;
1121    }
1122
1123    /// Returns a hex encoded hash of the inputs.
1124    pub fn final(self: *Manifest) HexDigest {
1125        const bin_digest = self.finalBin();
1126        return binToHex(bin_digest);
1127    }
1128
1129    /// If `want_shared_lock` is true, this function automatically downgrades the
1130    /// lock from exclusive to shared.
1131    pub fn writeManifest(self: *Manifest) !void {
1132        assert(self.have_exclusive_lock);
1133
1134        const manifest_file = self.manifest_file.?;
1135        if (self.manifest_dirty) {
1136            self.manifest_dirty = false;
1137
1138            var buffer: [4000]u8 = undefined;
1139            var fw = manifest_file.writer(&buffer);
1140            writeDirtyManifestToStream(self, &fw) catch |err| switch (err) {
1141                error.WriteFailed => return fw.err.?,
1142                else => |e| return e,
1143            };
1144        }
1145
1146        if (self.want_shared_lock) {
1147            try self.downgradeToSharedLock();
1148        }
1149    }
1150
1151    fn writeDirtyManifestToStream(self: *Manifest, fw: *fs.File.Writer) !void {
1152        try fw.interface.writeAll(manifest_header ++ "\n");
1153        for (self.files.keys()) |file| {
1154            try fw.interface.print("{d} {d} {d} {x} {d} {s}\n", .{
1155                file.stat.size,
1156                file.stat.inode,
1157                file.stat.mtime,
1158                &file.bin_digest,
1159                file.prefixed_path.prefix,
1160                file.prefixed_path.sub_path,
1161            });
1162        }
1163        try fw.end();
1164    }
1165
1166    fn downgradeToSharedLock(self: *Manifest) !void {
1167        if (!self.have_exclusive_lock) return;
1168
1169        // WASI does not currently support flock, so we bypass it here.
1170        // TODO: If/when flock is supported on WASI, this check should be removed.
1171        //       See https://github.com/WebAssembly/wasi-filesystem/issues/2
1172        if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) {
1173            const manifest_file = self.manifest_file.?;
1174            try manifest_file.downgradeLock();
1175        }
1176
1177        self.have_exclusive_lock = false;
1178    }
1179
1180    fn upgradeToExclusiveLock(self: *Manifest) error{CacheCheckFailed}!bool {
1181        if (self.have_exclusive_lock) return false;
1182        assert(self.manifest_file != null);
1183
1184        // WASI does not currently support flock, so we bypass it here.
1185        // TODO: If/when flock is supported on WASI, this check should be removed.
1186        //       See https://github.com/WebAssembly/wasi-filesystem/issues/2
1187        if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) {
1188            const manifest_file = self.manifest_file.?;
1189            // Here we intentionally have a period where the lock is released, in case there are
1190            // other processes holding a shared lock.
1191            manifest_file.unlock();
1192            manifest_file.lock(.exclusive) catch |err| {
1193                self.diagnostic = .{ .manifest_lock = err };
1194                return error.CacheCheckFailed;
1195            };
1196        }
1197        self.have_exclusive_lock = true;
1198        return true;
1199    }
1200
1201    /// Obtain only the data needed to maintain a lock on the manifest file.
1202    /// The `Manifest` remains safe to deinit.
1203    /// Don't forget to call `writeManifest` before this!
1204    pub fn toOwnedLock(self: *Manifest) Lock {
1205        const lock: Lock = .{
1206            .manifest_file = self.manifest_file.?,
1207        };
1208
1209        self.manifest_file = null;
1210        return lock;
1211    }
1212
1213    /// Releases the manifest file and frees any memory the Manifest was using.
1214    /// `Manifest.hit` must be called first.
1215    /// Don't forget to call `writeManifest` before this!
1216    pub fn deinit(self: *Manifest) void {
1217        if (self.manifest_file) |file| {
1218            if (builtin.os.tag == .windows) {
1219                // See Lock.release for why this is required on Windows
1220                file.unlock();
1221            }
1222
1223            file.close();
1224        }
1225        for (self.files.keys()) |*file| {
1226            file.deinit(self.cache.gpa);
1227        }
1228        self.files.deinit(self.cache.gpa);
1229    }
1230
1231    pub fn populateFileSystemInputs(man: *Manifest, buf: *std.ArrayList(u8)) Allocator.Error!void {
1232        assert(@typeInfo(std.zig.Server.Message.PathPrefix).@"enum".fields.len == man.cache.prefixes_len);
1233        buf.clearRetainingCapacity();
1234        const gpa = man.cache.gpa;
1235        const files = man.files.keys();
1236        if (files.len > 0) {
1237            for (files) |file| {
1238                try buf.ensureUnusedCapacity(gpa, file.prefixed_path.sub_path.len + 2);
1239                buf.appendAssumeCapacity(file.prefixed_path.prefix + 1);
1240                buf.appendSliceAssumeCapacity(file.prefixed_path.sub_path);
1241                buf.appendAssumeCapacity(0);
1242            }
1243            // The null byte is a separator, not a terminator.
1244            buf.items.len -= 1;
1245        }
1246    }
1247
1248    pub fn populateOtherManifest(man: *Manifest, other: *Manifest, prefix_map: [4]u8) Allocator.Error!void {
1249        const gpa = other.cache.gpa;
1250        assert(@typeInfo(std.zig.Server.Message.PathPrefix).@"enum".fields.len == man.cache.prefixes_len);
1251        assert(man.cache.prefixes_len == 4);
1252        for (man.files.keys()) |file| {
1253            const prefixed_path: PrefixedPath = .{
1254                .prefix = prefix_map[file.prefixed_path.prefix],
1255                .sub_path = try gpa.dupe(u8, file.prefixed_path.sub_path),
1256            };
1257            errdefer gpa.free(prefixed_path.sub_path);
1258
1259            const gop = try other.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
1260            errdefer _ = other.files.pop();
1261
1262            if (gop.found_existing) {
1263                gpa.free(prefixed_path.sub_path);
1264                continue;
1265            }
1266
1267            gop.key_ptr.* = .{
1268                .prefixed_path = prefixed_path,
1269                .max_file_size = file.max_file_size,
1270                .handle = file.handle,
1271                .stat = file.stat,
1272                .bin_digest = file.bin_digest,
1273                .contents = null,
1274            };
1275
1276            other.hash.hasher.update(&gop.key_ptr.bin_digest);
1277        }
1278    }
1279};
1280
1281/// On operating systems that support symlinks, does a readlink. On other operating systems,
1282/// uses the file contents. Windows supports symlinks but only with elevated privileges, so
1283/// it is treated as not supporting symlinks.
1284pub fn readSmallFile(dir: fs.Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
1285    if (builtin.os.tag == .windows) {
1286        return dir.readFile(sub_path, buffer);
1287    } else {
1288        return dir.readLink(sub_path, buffer);
1289    }
1290}
1291
1292/// On operating systems that support symlinks, does a symlink. On other operating systems,
1293/// uses the file contents. Windows supports symlinks but only with elevated privileges, so
1294/// it is treated as not supporting symlinks.
1295/// `data` must be a valid UTF-8 encoded file path and 255 bytes or fewer.
1296pub fn writeSmallFile(dir: fs.Dir, sub_path: []const u8, data: []const u8) !void {
1297    assert(data.len <= 255);
1298    if (builtin.os.tag == .windows) {
1299        return dir.writeFile(.{ .sub_path = sub_path, .data = data });
1300    } else {
1301        return dir.symLink(data, sub_path, .{});
1302    }
1303}
1304
1305fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) fs.File.PReadError!void {
1306    var buf: [1024]u8 = undefined;
1307    var hasher = hasher_init;
1308    var off: u64 = 0;
1309    while (true) {
1310        const bytes_read = try file.pread(&buf, off);
1311        if (bytes_read == 0) break;
1312        hasher.update(buf[0..bytes_read]);
1313        off += bytes_read;
1314    }
1315    hasher.final(bin_digest);
1316}
1317
1318// Create/Write a file, close it, then grab its stat.mtime timestamp.
1319fn testGetCurrentFileTimestamp(dir: fs.Dir) !Io.Timestamp {
1320    const test_out_file = "test-filetimestamp.tmp";
1321
1322    var file = try dir.createFile(test_out_file, .{
1323        .read = true,
1324        .truncate = true,
1325    });
1326    defer {
1327        file.close();
1328        dir.deleteFile(test_out_file) catch {};
1329    }
1330
1331    return (try file.stat()).mtime;
1332}
1333
1334test "cache file and then recall it" {
1335    const io = std.testing.io;
1336
1337    var tmp = testing.tmpDir(.{});
1338    defer tmp.cleanup();
1339
1340    const temp_file = "test.txt";
1341    const temp_manifest_dir = "temp_manifest_dir";
1342
1343    try tmp.dir.writeFile(.{ .sub_path = temp_file, .data = "Hello, world!\n" });
1344
1345    // Wait for file timestamps to tick
1346    const initial_time = try testGetCurrentFileTimestamp(tmp.dir);
1347    while ((try testGetCurrentFileTimestamp(tmp.dir)).nanoseconds == initial_time.nanoseconds) {
1348        try std.Io.Clock.Duration.sleep(.{ .clock = .boot, .raw = .fromNanoseconds(1) }, io);
1349    }
1350
1351    var digest1: HexDigest = undefined;
1352    var digest2: HexDigest = undefined;
1353
1354    {
1355        var cache: Cache = .{
1356            .io = io,
1357            .gpa = testing.allocator,
1358            .manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
1359        };
1360        cache.addPrefix(.{ .path = null, .handle = tmp.dir });
1361        defer cache.manifest_dir.close();
1362
1363        {
1364            var ch = cache.obtain();
1365            defer ch.deinit();
1366
1367            ch.hash.add(true);
1368            ch.hash.add(@as(u16, 1234));
1369            ch.hash.addBytes("1234");
1370            _ = try ch.addFile(temp_file, null);
1371
1372            // There should be nothing in the cache
1373            try testing.expectEqual(false, try ch.hit());
1374
1375            digest1 = ch.final();
1376            try ch.writeManifest();
1377        }
1378        {
1379            var ch = cache.obtain();
1380            defer ch.deinit();
1381
1382            ch.hash.add(true);
1383            ch.hash.add(@as(u16, 1234));
1384            ch.hash.addBytes("1234");
1385            _ = try ch.addFile(temp_file, null);
1386
1387            // Cache hit! We just "built" the same file
1388            try testing.expect(try ch.hit());
1389            digest2 = ch.final();
1390
1391            try testing.expectEqual(false, ch.have_exclusive_lock);
1392        }
1393
1394        try testing.expectEqual(digest1, digest2);
1395    }
1396}
1397
1398test "check that changing a file makes cache fail" {
1399    const io = std.testing.io;
1400
1401    var tmp = testing.tmpDir(.{});
1402    defer tmp.cleanup();
1403
1404    const temp_file = "cache_hash_change_file_test.txt";
1405    const temp_manifest_dir = "cache_hash_change_file_manifest_dir";
1406    const original_temp_file_contents = "Hello, world!\n";
1407    const updated_temp_file_contents = "Hello, world; but updated!\n";
1408
1409    try tmp.dir.writeFile(.{ .sub_path = temp_file, .data = original_temp_file_contents });
1410
1411    // Wait for file timestamps to tick
1412    const initial_time = try testGetCurrentFileTimestamp(tmp.dir);
1413    while ((try testGetCurrentFileTimestamp(tmp.dir)).nanoseconds == initial_time.nanoseconds) {
1414        try std.Io.Clock.Duration.sleep(.{ .clock = .boot, .raw = .fromNanoseconds(1) }, io);
1415    }
1416
1417    var digest1: HexDigest = undefined;
1418    var digest2: HexDigest = undefined;
1419
1420    {
1421        var cache: Cache = .{
1422            .io = io,
1423            .gpa = testing.allocator,
1424            .manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
1425        };
1426        cache.addPrefix(.{ .path = null, .handle = tmp.dir });
1427        defer cache.manifest_dir.close();
1428
1429        {
1430            var ch = cache.obtain();
1431            defer ch.deinit();
1432
1433            ch.hash.addBytes("1234");
1434            const temp_file_idx = try ch.addFile(temp_file, 100);
1435
1436            // There should be nothing in the cache
1437            try testing.expectEqual(false, try ch.hit());
1438
1439            try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.keys()[temp_file_idx].contents.?));
1440
1441            digest1 = ch.final();
1442
1443            try ch.writeManifest();
1444        }
1445
1446        try tmp.dir.writeFile(.{ .sub_path = temp_file, .data = updated_temp_file_contents });
1447
1448        {
1449            var ch = cache.obtain();
1450            defer ch.deinit();
1451
1452            ch.hash.addBytes("1234");
1453            const temp_file_idx = try ch.addFile(temp_file, 100);
1454
1455            // A file that we depend on has been updated, so the cache should not contain an entry for it
1456            try testing.expectEqual(false, try ch.hit());
1457
1458            // The cache system does not keep the contents of re-hashed input files.
1459            try testing.expect(ch.files.keys()[temp_file_idx].contents == null);
1460
1461            digest2 = ch.final();
1462
1463            try ch.writeManifest();
1464        }
1465
1466        try testing.expect(!mem.eql(u8, digest1[0..], digest2[0..]));
1467    }
1468}
1469
1470test "no file inputs" {
1471    const io = testing.io;
1472
1473    var tmp = testing.tmpDir(.{});
1474    defer tmp.cleanup();
1475
1476    const temp_manifest_dir = "no_file_inputs_manifest_dir";
1477
1478    var digest1: HexDigest = undefined;
1479    var digest2: HexDigest = undefined;
1480
1481    var cache: Cache = .{
1482        .io = io,
1483        .gpa = testing.allocator,
1484        .manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
1485    };
1486    cache.addPrefix(.{ .path = null, .handle = tmp.dir });
1487    defer cache.manifest_dir.close();
1488
1489    {
1490        var man = cache.obtain();
1491        defer man.deinit();
1492
1493        man.hash.addBytes("1234");
1494
1495        // There should be nothing in the cache
1496        try testing.expectEqual(false, try man.hit());
1497
1498        digest1 = man.final();
1499
1500        try man.writeManifest();
1501    }
1502    {
1503        var man = cache.obtain();
1504        defer man.deinit();
1505
1506        man.hash.addBytes("1234");
1507
1508        try testing.expect(try man.hit());
1509        digest2 = man.final();
1510        try testing.expectEqual(false, man.have_exclusive_lock);
1511    }
1512
1513    try testing.expectEqual(digest1, digest2);
1514}
1515
1516test "Manifest with files added after initial hash work" {
1517    const io = std.testing.io;
1518
1519    var tmp = testing.tmpDir(.{});
1520    defer tmp.cleanup();
1521
1522    const temp_file1 = "cache_hash_post_file_test1.txt";
1523    const temp_file2 = "cache_hash_post_file_test2.txt";
1524    const temp_manifest_dir = "cache_hash_post_file_manifest_dir";
1525
1526    try tmp.dir.writeFile(.{ .sub_path = temp_file1, .data = "Hello, world!\n" });
1527    try tmp.dir.writeFile(.{ .sub_path = temp_file2, .data = "Hello world the second!\n" });
1528
1529    // Wait for file timestamps to tick
1530    const initial_time = try testGetCurrentFileTimestamp(tmp.dir);
1531    while ((try testGetCurrentFileTimestamp(tmp.dir)).nanoseconds == initial_time.nanoseconds) {
1532        try std.Io.Clock.Duration.sleep(.{ .clock = .boot, .raw = .fromNanoseconds(1) }, io);
1533    }
1534
1535    var digest1: HexDigest = undefined;
1536    var digest2: HexDigest = undefined;
1537    var digest3: HexDigest = undefined;
1538
1539    {
1540        var cache: Cache = .{
1541            .io = io,
1542            .gpa = testing.allocator,
1543            .manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
1544        };
1545        cache.addPrefix(.{ .path = null, .handle = tmp.dir });
1546        defer cache.manifest_dir.close();
1547
1548        {
1549            var ch = cache.obtain();
1550            defer ch.deinit();
1551
1552            ch.hash.addBytes("1234");
1553            _ = try ch.addFile(temp_file1, null);
1554
1555            // There should be nothing in the cache
1556            try testing.expectEqual(false, try ch.hit());
1557
1558            _ = try ch.addFilePost(temp_file2);
1559
1560            digest1 = ch.final();
1561            try ch.writeManifest();
1562        }
1563        {
1564            var ch = cache.obtain();
1565            defer ch.deinit();
1566
1567            ch.hash.addBytes("1234");
1568            _ = try ch.addFile(temp_file1, null);
1569
1570            try testing.expect(try ch.hit());
1571            digest2 = ch.final();
1572
1573            try testing.expectEqual(false, ch.have_exclusive_lock);
1574        }
1575        try testing.expect(mem.eql(u8, &digest1, &digest2));
1576
1577        // Modify the file added after initial hash
1578        try tmp.dir.writeFile(.{ .sub_path = temp_file2, .data = "Hello world the second, updated\n" });
1579
1580        // Wait for file timestamps to tick
1581        const initial_time2 = try testGetCurrentFileTimestamp(tmp.dir);
1582        while ((try testGetCurrentFileTimestamp(tmp.dir)).nanoseconds == initial_time2.nanoseconds) {
1583            try std.Io.Clock.Duration.sleep(.{ .clock = .boot, .raw = .fromNanoseconds(1) }, io);
1584        }
1585
1586        {
1587            var ch = cache.obtain();
1588            defer ch.deinit();
1589
1590            ch.hash.addBytes("1234");
1591            _ = try ch.addFile(temp_file1, null);
1592
1593            // A file that we depend on has been updated, so the cache should not contain an entry for it
1594            try testing.expectEqual(false, try ch.hit());
1595
1596            _ = try ch.addFilePost(temp_file2);
1597
1598            digest3 = ch.final();
1599
1600            try ch.writeManifest();
1601        }
1602
1603        try testing.expect(!mem.eql(u8, &digest1, &digest3));
1604    }
1605}