master
1//! Manages `zig-cache` directories.
2//! This is not a general-purpose cache. It is designed to be fast and simple,
3//! not to withstand attacks using specially-crafted input.
4
5const Cache = @This();
6const builtin = @import("builtin");
7
8const std = @import("std");
9const Io = std.Io;
10const crypto = std.crypto;
11const fs = std.fs;
12const assert = std.debug.assert;
13const testing = std.testing;
14const mem = std.mem;
15const fmt = std.fmt;
16const Allocator = std.mem.Allocator;
17const log = std.log.scoped(.cache);
18
19gpa: Allocator,
20io: Io,
21manifest_dir: fs.Dir,
22hash: HashHelper = .{},
23/// This value is accessed from multiple threads, protected by mutex.
24recent_problematic_timestamp: Io.Timestamp = .zero,
25mutex: Io.Mutex = .init,
26
27/// A set of strings such as the zig library directory or project source root, which
28/// are stripped from the file paths before putting into the cache. They
29/// are replaced with single-character indicators. This is not to save
30/// space but to eliminate absolute file paths. This improves portability
31/// and usefulness of the cache for advanced use cases.
32prefixes_buffer: [4]Directory = undefined,
33prefixes_len: usize = 0,
34
35pub const Path = @import("Cache/Path.zig");
36pub const Directory = @import("Cache/Directory.zig");
37pub const DepTokenizer = @import("Cache/DepTokenizer.zig");
38
39pub fn addPrefix(cache: *Cache, directory: Directory) void {
40 cache.prefixes_buffer[cache.prefixes_len] = directory;
41 cache.prefixes_len += 1;
42}
43
44/// Be sure to call `Manifest.deinit` after successful initialization.
45pub fn obtain(cache: *Cache) Manifest {
46 return .{
47 .cache = cache,
48 .hash = cache.hash,
49 .manifest_file = null,
50 .manifest_dirty = false,
51 .hex_digest = undefined,
52 };
53}
54
55pub fn prefixes(cache: *const Cache) []const Directory {
56 return cache.prefixes_buffer[0..cache.prefixes_len];
57}
58
59const PrefixedPath = struct {
60 prefix: u8,
61 sub_path: []const u8,
62
63 fn eql(a: PrefixedPath, b: PrefixedPath) bool {
64 return a.prefix == b.prefix and std.mem.eql(u8, a.sub_path, b.sub_path);
65 }
66
67 fn hash(pp: PrefixedPath) u32 {
68 return @truncate(std.hash.Wyhash.hash(pp.prefix, pp.sub_path));
69 }
70};
71
72fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath {
73 const gpa = cache.gpa;
74 const resolved_path = try fs.path.resolve(gpa, &.{file_path});
75 errdefer gpa.free(resolved_path);
76 return findPrefixResolved(cache, resolved_path);
77}
78
79/// Takes ownership of `resolved_path` on success.
80fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath {
81 const gpa = cache.gpa;
82 const prefixes_slice = cache.prefixes();
83 var i: u8 = 1; // Start at 1 to skip over checking the null prefix.
84 while (i < prefixes_slice.len) : (i += 1) {
85 const p = prefixes_slice[i].path.?;
86 const sub_path = getPrefixSubpath(gpa, p, resolved_path) catch |err| switch (err) {
87 error.NotASubPath => continue,
88 else => |e| return e,
89 };
90 // Free the resolved path since we're not going to return it
91 gpa.free(resolved_path);
92 return PrefixedPath{
93 .prefix = i,
94 .sub_path = sub_path,
95 };
96 }
97
98 return PrefixedPath{
99 .prefix = 0,
100 .sub_path = resolved_path,
101 };
102}
103
104fn getPrefixSubpath(allocator: Allocator, prefix: []const u8, path: []u8) ![]u8 {
105 const relative = try fs.path.relative(allocator, prefix, path);
106 errdefer allocator.free(relative);
107 var component_iterator = fs.path.NativeComponentIterator.init(relative);
108 if (component_iterator.root() != null) {
109 return error.NotASubPath;
110 }
111 const first_component = component_iterator.first();
112 if (first_component != null and std.mem.eql(u8, first_component.?.name, "..")) {
113 return error.NotASubPath;
114 }
115 return relative;
116}
117
118/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6
119pub const bin_digest_len = 16;
120pub const hex_digest_len = bin_digest_len * 2;
121pub const BinDigest = [bin_digest_len]u8;
122pub const HexDigest = [hex_digest_len]u8;
123
124/// This is currently just an arbitrary non-empty string that can't match another manifest line.
125const manifest_header = "0";
126const manifest_file_size_max = 100 * 1024 * 1024;
127
128/// The type used for hashing file contents. Currently, this is SipHash128(1, 3), because it
129/// provides enough collision resistance for the Manifest use cases, while being one of our
130/// fastest options right now.
131pub const Hasher = crypto.auth.siphash.SipHash128(1, 3);
132
133/// Initial state with random bytes, that can be copied.
134/// Refresh this with new random bytes when the manifest
135/// format is modified in a non-backwards-compatible way.
136pub const hasher_init: Hasher = Hasher.init(&.{
137 0x33, 0x52, 0xa2, 0x84,
138 0xcf, 0x17, 0x56, 0x57,
139 0x01, 0xbb, 0xcd, 0xe4,
140 0x77, 0xd6, 0xf0, 0x60,
141});
142
143pub const File = struct {
144 prefixed_path: PrefixedPath,
145 max_file_size: ?usize,
146 /// Populated if the user calls `addOpenedFile`.
147 /// The handle is not owned here.
148 handle: ?fs.File,
149 stat: Stat,
150 bin_digest: BinDigest,
151 contents: ?[]const u8,
152
153 pub const Stat = struct {
154 inode: fs.File.INode,
155 size: u64,
156 mtime: Io.Timestamp,
157
158 pub fn fromFs(fs_stat: fs.File.Stat) Stat {
159 return .{
160 .inode = fs_stat.inode,
161 .size = fs_stat.size,
162 .mtime = fs_stat.mtime,
163 };
164 }
165 };
166
167 pub fn deinit(self: *File, gpa: Allocator) void {
168 gpa.free(self.prefixed_path.sub_path);
169 if (self.contents) |contents| {
170 gpa.free(contents);
171 self.contents = null;
172 }
173 self.* = undefined;
174 }
175
176 pub fn updateMaxSize(file: *File, new_max_size: ?usize) void {
177 const new = new_max_size orelse return;
178 file.max_file_size = if (file.max_file_size) |old| @max(old, new) else new;
179 }
180
181 pub fn updateHandle(file: *File, new_handle: ?fs.File) void {
182 const handle = new_handle orelse return;
183 file.handle = handle;
184 }
185};
186
187pub const HashHelper = struct {
188 hasher: Hasher = hasher_init,
189
190 /// Record a slice of bytes as a dependency of the process being cached.
191 pub fn addBytes(hh: *HashHelper, bytes: []const u8) void {
192 hh.hasher.update(mem.asBytes(&bytes.len));
193 hh.hasher.update(bytes);
194 }
195
196 pub fn addOptionalBytes(hh: *HashHelper, optional_bytes: ?[]const u8) void {
197 hh.add(optional_bytes != null);
198 hh.addBytes(optional_bytes orelse return);
199 }
200
201 pub fn addListOfBytes(hh: *HashHelper, list_of_bytes: []const []const u8) void {
202 hh.add(list_of_bytes.len);
203 for (list_of_bytes) |bytes| hh.addBytes(bytes);
204 }
205
206 pub fn addOptionalListOfBytes(hh: *HashHelper, optional_list_of_bytes: ?[]const []const u8) void {
207 hh.add(optional_list_of_bytes != null);
208 hh.addListOfBytes(optional_list_of_bytes orelse return);
209 }
210
211 /// Convert the input value into bytes and record it as a dependency of the process being cached.
212 pub fn add(hh: *HashHelper, x: anytype) void {
213 switch (@TypeOf(x)) {
214 std.SemanticVersion => {
215 hh.add(x.major);
216 hh.add(x.minor);
217 hh.add(x.patch);
218 },
219 std.Target.Os.TaggedVersionRange => {
220 switch (x) {
221 .hurd => |hurd| {
222 hh.add(hurd.range.min);
223 hh.add(hurd.range.max);
224 hh.add(hurd.glibc);
225 },
226 .linux => |linux| {
227 hh.add(linux.range.min);
228 hh.add(linux.range.max);
229 hh.add(linux.glibc);
230 hh.add(linux.android);
231 },
232 .windows => |windows| {
233 hh.add(windows.min);
234 hh.add(windows.max);
235 },
236 .semver => |semver| {
237 hh.add(semver.min);
238 hh.add(semver.max);
239 },
240 .none => {},
241 }
242 },
243 std.zig.BuildId => switch (x) {
244 .none, .fast, .uuid, .sha1, .md5 => hh.add(std.meta.activeTag(x)),
245 .hexstring => |hex_string| hh.addBytes(hex_string.toSlice()),
246 },
247 else => switch (@typeInfo(@TypeOf(x))) {
248 .bool, .int, .@"enum", .array => hh.addBytes(mem.asBytes(&x)),
249 else => @compileError("unable to hash type " ++ @typeName(@TypeOf(x))),
250 },
251 }
252 }
253
254 pub fn addOptional(hh: *HashHelper, optional: anytype) void {
255 hh.add(optional != null);
256 hh.add(optional orelse return);
257 }
258
259 /// Returns a hex encoded hash of the inputs, without modifying state.
260 pub fn peek(hh: HashHelper) [hex_digest_len]u8 {
261 var copy = hh;
262 return copy.final();
263 }
264
265 pub fn peekBin(hh: HashHelper) BinDigest {
266 var copy = hh;
267 var bin_digest: BinDigest = undefined;
268 copy.hasher.final(&bin_digest);
269 return bin_digest;
270 }
271
272 /// Returns a hex encoded hash of the inputs, mutating the state of the hasher.
273 pub fn final(hh: *HashHelper) HexDigest {
274 var bin_digest: BinDigest = undefined;
275 hh.hasher.final(&bin_digest);
276 return binToHex(bin_digest);
277 }
278
279 pub fn oneShot(bytes: []const u8) [hex_digest_len]u8 {
280 var hasher: Hasher = hasher_init;
281 hasher.update(bytes);
282 var bin_digest: BinDigest = undefined;
283 hasher.final(&bin_digest);
284 return binToHex(bin_digest);
285 }
286};
287
288pub fn binToHex(bin_digest: BinDigest) HexDigest {
289 var out_digest: HexDigest = undefined;
290 var w: std.Io.Writer = .fixed(&out_digest);
291 w.printHex(&bin_digest, .lower) catch unreachable;
292 return out_digest;
293}
294
295pub const Lock = struct {
296 manifest_file: fs.File,
297
298 pub fn release(lock: *Lock) void {
299 if (builtin.os.tag == .windows) {
300 // Windows does not guarantee that locks are immediately unlocked when
301 // the file handle is closed. See LockFileEx documentation.
302 lock.manifest_file.unlock();
303 }
304
305 lock.manifest_file.close();
306 lock.* = undefined;
307 }
308};
309
310pub const Manifest = struct {
311 cache: *Cache,
312 /// Current state for incremental hashing.
313 hash: HashHelper,
314 manifest_file: ?fs.File,
315 manifest_dirty: bool,
316 /// Set this flag to true before calling hit() in order to indicate that
317 /// upon a cache hit, the code using the cache will not modify the files
318 /// within the cache directory. This allows multiple processes to utilize
319 /// the same cache directory at the same time.
320 want_shared_lock: bool = true,
321 have_exclusive_lock: bool = false,
322 // Indicate that we want isProblematicTimestamp to perform a filesystem write in
323 // order to obtain a problematic timestamp for the next call. Calls after that
324 // will then use the same timestamp, to avoid unnecessary filesystem writes.
325 want_refresh_timestamp: bool = true,
326 files: Files = .{},
327 hex_digest: HexDigest,
328 diagnostic: Diagnostic = .none,
329 /// Keeps track of the last time we performed a file system write to observe
330 /// what time the file system thinks it is, according to its own granularity.
331 recent_problematic_timestamp: Io.Timestamp = .zero,
332
333 pub const Diagnostic = union(enum) {
334 none,
335 manifest_create: fs.File.OpenError,
336 manifest_read: fs.File.ReadError,
337 manifest_lock: fs.File.LockError,
338 file_open: FileOp,
339 file_stat: FileOp,
340 file_read: FileOp,
341 file_hash: FileOp,
342
343 pub const FileOp = struct {
344 file_index: usize,
345 err: anyerror,
346 };
347 };
348
349 pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false);
350
351 pub const FilesContext = struct {
352 pub fn hash(fc: FilesContext, file: File) u32 {
353 _ = fc;
354 return file.prefixed_path.hash();
355 }
356
357 pub fn eql(fc: FilesContext, a: File, b: File, b_index: usize) bool {
358 _ = fc;
359 _ = b_index;
360 return a.prefixed_path.eql(b.prefixed_path);
361 }
362 };
363
364 const FilesAdapter = struct {
365 pub fn eql(context: @This(), a: PrefixedPath, b: File, b_index: usize) bool {
366 _ = context;
367 _ = b_index;
368 return a.eql(b.prefixed_path);
369 }
370
371 pub fn hash(context: @This(), key: PrefixedPath) u32 {
372 _ = context;
373 return key.hash();
374 }
375 };
376
377 /// Add a file as a dependency of process being cached. When `hit` is
378 /// called, the file's contents will be checked to ensure that it matches
379 /// the contents from previous times.
380 ///
381 /// Max file size will be used to determine the amount of space the file contents
382 /// are allowed to take up in memory. If max_file_size is null, then the contents
383 /// will not be loaded into memory.
384 ///
385 /// Returns the index of the entry in the `files` array list. You can use it
386 /// to access the contents of the file after calling `hit()` like so:
387 ///
388 /// ```
389 /// var file_contents = cache_hash.files.keys()[file_index].contents.?;
390 /// ```
391 pub fn addFilePath(m: *Manifest, file_path: Path, max_file_size: ?usize) !usize {
392 return addOpenedFile(m, file_path, null, max_file_size);
393 }
394
395 /// Same as `addFilePath` except the file has already been opened.
396 pub fn addOpenedFile(m: *Manifest, path: Path, handle: ?fs.File, max_file_size: ?usize) !usize {
397 const gpa = m.cache.gpa;
398 try m.files.ensureUnusedCapacity(gpa, 1);
399 const resolved_path = try fs.path.resolve(gpa, &.{
400 path.root_dir.path orelse ".",
401 path.subPathOrDot(),
402 });
403 errdefer gpa.free(resolved_path);
404 const prefixed_path = try m.cache.findPrefixResolved(resolved_path);
405 return addFileInner(m, prefixed_path, handle, max_file_size);
406 }
407
408 /// Deprecated; use `addFilePath`.
409 pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize {
410 assert(self.manifest_file == null);
411
412 const gpa = self.cache.gpa;
413 try self.files.ensureUnusedCapacity(gpa, 1);
414 const prefixed_path = try self.cache.findPrefix(file_path);
415 errdefer gpa.free(prefixed_path.sub_path);
416
417 return addFileInner(self, prefixed_path, null, max_file_size);
418 }
419
420 fn addFileInner(self: *Manifest, prefixed_path: PrefixedPath, handle: ?fs.File, max_file_size: ?usize) usize {
421 const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{});
422 if (gop.found_existing) {
423 self.cache.gpa.free(prefixed_path.sub_path);
424 gop.key_ptr.updateMaxSize(max_file_size);
425 gop.key_ptr.updateHandle(handle);
426 return gop.index;
427 }
428 gop.key_ptr.* = .{
429 .prefixed_path = prefixed_path,
430 .contents = null,
431 .max_file_size = max_file_size,
432 .stat = undefined,
433 .bin_digest = undefined,
434 .handle = handle,
435 };
436
437 self.hash.add(prefixed_path.prefix);
438 self.hash.addBytes(prefixed_path.sub_path);
439
440 return gop.index;
441 }
442
443 /// Deprecated, use `addOptionalFilePath`.
444 pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void {
445 self.hash.add(optional_file_path != null);
446 const file_path = optional_file_path orelse return;
447 _ = try self.addFile(file_path, null);
448 }
449
450 pub fn addOptionalFilePath(self: *Manifest, optional_file_path: ?Path) !void {
451 self.hash.add(optional_file_path != null);
452 const file_path = optional_file_path orelse return;
453 _ = try self.addFilePath(file_path, null);
454 }
455
456 pub fn addListOfFiles(self: *Manifest, list_of_files: []const []const u8) !void {
457 self.hash.add(list_of_files.len);
458 for (list_of_files) |file_path| {
459 _ = try self.addFile(file_path, null);
460 }
461 }
462
463 pub fn addDepFile(self: *Manifest, dir: fs.Dir, dep_file_sub_path: []const u8) !void {
464 assert(self.manifest_file == null);
465 return self.addDepFileMaybePost(dir, dep_file_sub_path);
466 }
467
468 pub const HitError = error{
469 /// Unable to check the cache for a reason that has been recorded into
470 /// the `diagnostic` field.
471 CacheCheckFailed,
472 /// A cache manifest file exists however it could not be parsed.
473 InvalidFormat,
474 OutOfMemory,
475 Canceled,
476 };
477
478 /// Check the cache to see if the input exists in it. If it exists, returns `true`.
479 /// A hex encoding of its hash is available by calling `final`.
480 ///
481 /// This function will also acquire an exclusive lock to the manifest file. This means
482 /// that a process holding a Manifest will block any other process attempting to
483 /// acquire the lock. If `want_shared_lock` is `true`, a cache hit guarantees the
484 /// manifest file to be locked in shared mode, and a cache miss guarantees the manifest
485 /// file to be locked in exclusive mode.
486 ///
487 /// The lock on the manifest file is released when `deinit` is called. As another
488 /// option, one may call `toOwnedLock` to obtain a smaller object which can represent
489 /// the lock. `deinit` is safe to call whether or not `toOwnedLock` has been called.
490 pub fn hit(self: *Manifest) HitError!bool {
491 assert(self.manifest_file == null);
492
493 self.diagnostic = .none;
494
495 const ext = ".txt";
496 var manifest_file_path: [hex_digest_len + ext.len]u8 = undefined;
497
498 var bin_digest: BinDigest = undefined;
499 self.hash.hasher.final(&bin_digest);
500
501 self.hex_digest = binToHex(bin_digest);
502
503 @memcpy(manifest_file_path[0..self.hex_digest.len], &self.hex_digest);
504 manifest_file_path[hex_digest_len..][0..ext.len].* = ext.*;
505
506 // We'll try to open the cache with an exclusive lock, but if that would block
507 // and `want_shared_lock` is set, a shared lock might be sufficient, so we'll
508 // open with a shared lock instead.
509 while (true) {
510 if (self.cache.manifest_dir.createFile(&manifest_file_path, .{
511 .read = true,
512 .truncate = false,
513 .lock = .exclusive,
514 .lock_nonblocking = self.want_shared_lock,
515 })) |manifest_file| {
516 self.manifest_file = manifest_file;
517 self.have_exclusive_lock = true;
518 break;
519 } else |err| switch (err) {
520 error.WouldBlock => {
521 self.manifest_file = self.cache.manifest_dir.openFile(&manifest_file_path, .{
522 .mode = .read_write,
523 .lock = .shared,
524 }) catch |e| {
525 self.diagnostic = .{ .manifest_create = e };
526 return error.CacheCheckFailed;
527 };
528 break;
529 },
530 error.FileNotFound => {
531 // There are no dir components, so the only possibility
532 // should be that the directory behind the handle has been
533 // deleted, however we have observed on macOS two processes
534 // racing to do openat() with O_CREAT manifest in ENOENT.
535 //
536 // As a workaround, we retry with exclusive=true which
537 // disambiguates by returning EEXIST, indicating original
538 // failure was a race, or ENOENT, indicating deletion of
539 // the directory of our open handle.
540 if (!builtin.os.tag.isDarwin()) {
541 self.diagnostic = .{ .manifest_create = error.FileNotFound };
542 return error.CacheCheckFailed;
543 }
544
545 if (self.cache.manifest_dir.createFile(&manifest_file_path, .{
546 .read = true,
547 .truncate = false,
548 .lock = .exclusive,
549 .lock_nonblocking = self.want_shared_lock,
550 .exclusive = true,
551 })) |manifest_file| {
552 self.manifest_file = manifest_file;
553 self.have_exclusive_lock = true;
554 break;
555 } else |excl_err| switch (excl_err) {
556 error.WouldBlock, error.PathAlreadyExists => continue,
557 error.FileNotFound => {
558 self.diagnostic = .{ .manifest_create = error.FileNotFound };
559 return error.CacheCheckFailed;
560 },
561 error.Canceled => return error.Canceled,
562 else => |e| {
563 self.diagnostic = .{ .manifest_create = e };
564 return error.CacheCheckFailed;
565 },
566 }
567 },
568 error.Canceled => return error.Canceled,
569 else => |e| {
570 self.diagnostic = .{ .manifest_create = e };
571 return error.CacheCheckFailed;
572 },
573 }
574 }
575
576 self.want_refresh_timestamp = true;
577
578 const input_file_count = self.files.entries.len;
579
580 // We're going to construct a second hash. Its input will begin with the digest we've
581 // already computed (`bin_digest`), and then it'll have the digests of each input file,
582 // including "post" files (see `addFilePost`). If this is a hit, we learn the set of "post"
583 // files from the manifest on disk. If this is a miss, we'll learn those from future calls
584 // to `addFilePost` etc. As such, the state of `self.hash.hasher` after this function
585 // depends on whether this is a hit or a miss.
586 //
587 // If we return `true` indicating a cache hit, then `self.hash.hasher` must already include
588 // the digests of the "post" files, so the caller can call `final`. Otherwise, on a cache
589 // miss, `self.hash.hasher` will include the digests of all non-"post" files -- that is,
590 // the ones we've already been told about. The rest will be discovered through calls to
591 // `addFilePost` etc, which will update the hasher. After all files are added, the user can
592 // use `final`, and will at some point `writeManifest` the file list to disk.
593
594 self.hash.hasher = hasher_init;
595 self.hash.hasher.update(&bin_digest);
596
597 hit: {
598 const file_digests_populated: usize = digests: {
599 switch (try self.hitWithCurrentLock()) {
600 .hit => break :hit,
601 .miss => |m| if (!try self.upgradeToExclusiveLock()) {
602 break :digests m.file_digests_populated;
603 },
604 }
605 // We've just had a miss with the shared lock, and upgraded to an exclusive lock. Someone
606 // else might have modified the digest, so we need to check again before deciding to miss.
607 // Before trying again, we must reset `self.hash.hasher` and `self.files`.
608 // This is basically just the first half of `unhit`.
609 self.hash.hasher = hasher_init;
610 self.hash.hasher.update(&bin_digest);
611 while (self.files.count() != input_file_count) {
612 var file = self.files.pop().?;
613 file.key.deinit(self.cache.gpa);
614 }
615 switch (try self.hitWithCurrentLock()) {
616 .hit => break :hit,
617 .miss => |m| break :digests m.file_digests_populated,
618 }
619 };
620
621 // This is a guaranteed cache miss. We're almost ready to return `false`, but there's a
622 // little bookkeeping to do first. The first `file_digests_populated` entries in `files`
623 // have their `bin_digest` populated; there may be some left in `input_file_count` which
624 // we'll need to populate ourselves. Other than that, this is basically `unhit`.
625 self.manifest_dirty = true;
626 self.hash.hasher = hasher_init;
627 self.hash.hasher.update(&bin_digest);
628 while (self.files.count() != input_file_count) {
629 var file = self.files.pop().?;
630 file.key.deinit(self.cache.gpa);
631 }
632 for (self.files.keys(), 0..) |*file, idx| {
633 if (idx < file_digests_populated) {
634 // `bin_digest` is already populated by `hitWithCurrentLock`, so we can use it directly.
635 self.hash.hasher.update(&file.bin_digest);
636 } else {
637 self.populateFileHash(file) catch |err| {
638 self.diagnostic = .{ .file_hash = .{
639 .file_index = idx,
640 .err = err,
641 } };
642 return error.CacheCheckFailed;
643 };
644 }
645 }
646 return false;
647 }
648
649 if (self.want_shared_lock) {
650 self.downgradeToSharedLock() catch |err| {
651 self.diagnostic = .{ .manifest_lock = err };
652 return error.CacheCheckFailed;
653 };
654 }
655
656 return true;
657 }
658
659 /// Assumes that `self.hash.hasher` has been updated only with the original digest and that
660 /// `self.files` contains only the original input files.
661 fn hitWithCurrentLock(self: *Manifest) HitError!union(enum) {
662 hit,
663 miss: struct {
664 file_digests_populated: usize,
665 },
666 } {
667 const gpa = self.cache.gpa;
668 const io = self.cache.io;
669 const input_file_count = self.files.entries.len;
670 var tiny_buffer: [1]u8 = undefined; // allows allocRemaining to detect limit exceeded
671 var manifest_reader = self.manifest_file.?.reader(io, &tiny_buffer); // Reads positionally from zero.
672 const limit: std.Io.Limit = .limited(manifest_file_size_max);
673 const file_contents = manifest_reader.interface.allocRemaining(gpa, limit) catch |err| switch (err) {
674 error.OutOfMemory => return error.OutOfMemory,
675 error.StreamTooLong => return error.OutOfMemory,
676 error.ReadFailed => {
677 self.diagnostic = .{ .manifest_read = manifest_reader.err.? };
678 return error.CacheCheckFailed;
679 },
680 };
681 defer gpa.free(file_contents);
682
683 var any_file_changed = false;
684 var line_iter = mem.tokenizeScalar(u8, file_contents, '\n');
685 var idx: usize = 0;
686 const header_valid = valid: {
687 const line = line_iter.next() orelse break :valid false;
688 break :valid std.mem.eql(u8, line, manifest_header);
689 };
690 if (!header_valid) {
691 return .{ .miss = .{ .file_digests_populated = 0 } };
692 }
693 while (line_iter.next()) |line| {
694 defer idx += 1;
695
696 var iter = mem.tokenizeScalar(u8, line, ' ');
697 const size = iter.next() orelse return error.InvalidFormat;
698 const inode = iter.next() orelse return error.InvalidFormat;
699 const mtime_nsec_str = iter.next() orelse return error.InvalidFormat;
700 const digest_str = iter.next() orelse return error.InvalidFormat;
701 const prefix_str = iter.next() orelse return error.InvalidFormat;
702 const file_path = iter.rest();
703
704 const stat_size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;
705 const stat_inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;
706 const stat_mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
707 const file_bin_digest = b: {
708 if (digest_str.len != hex_digest_len) return error.InvalidFormat;
709 var bd: BinDigest = undefined;
710 _ = fmt.hexToBytes(&bd, digest_str) catch return error.InvalidFormat;
711 break :b bd;
712 };
713
714 const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat;
715 if (prefix >= self.cache.prefixes_len) return error.InvalidFormat;
716
717 if (file_path.len == 0) return error.InvalidFormat;
718
719 const cache_hash_file = f: {
720 const prefixed_path: PrefixedPath = .{
721 .prefix = prefix,
722 .sub_path = file_path, // expires with file_contents
723 };
724 if (idx < input_file_count) {
725 const file = &self.files.keys()[idx];
726 if (!file.prefixed_path.eql(prefixed_path))
727 return error.InvalidFormat;
728
729 file.stat = .{
730 .size = stat_size,
731 .inode = stat_inode,
732 .mtime = .{ .nanoseconds = stat_mtime },
733 };
734 file.bin_digest = file_bin_digest;
735 break :f file;
736 }
737 const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
738 errdefer _ = self.files.pop();
739 if (!gop.found_existing) {
740 gop.key_ptr.* = .{
741 .prefixed_path = .{
742 .prefix = prefix,
743 .sub_path = try gpa.dupe(u8, file_path),
744 },
745 .contents = null,
746 .max_file_size = null,
747 .handle = null,
748 .stat = .{
749 .size = stat_size,
750 .inode = stat_inode,
751 .mtime = .{ .nanoseconds = stat_mtime },
752 },
753 .bin_digest = file_bin_digest,
754 };
755 }
756 break :f gop.key_ptr;
757 };
758
759 const pp = cache_hash_file.prefixed_path;
760 const dir = self.cache.prefixes()[pp.prefix].handle;
761 const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) {
762 error.FileNotFound => {
763 // Every digest before this one has been populated successfully.
764 return .{ .miss = .{ .file_digests_populated = idx } };
765 },
766 error.Canceled => return error.Canceled,
767 else => |e| {
768 self.diagnostic = .{ .file_open = .{
769 .file_index = idx,
770 .err = e,
771 } };
772 return error.CacheCheckFailed;
773 },
774 };
775 defer this_file.close();
776
777 const actual_stat = this_file.stat() catch |err| {
778 self.diagnostic = .{ .file_stat = .{
779 .file_index = idx,
780 .err = err,
781 } };
782 return error.CacheCheckFailed;
783 };
784 const size_match = actual_stat.size == cache_hash_file.stat.size;
785 const mtime_match = actual_stat.mtime.nanoseconds == cache_hash_file.stat.mtime.nanoseconds;
786 const inode_match = actual_stat.inode == cache_hash_file.stat.inode;
787
788 if (!size_match or !mtime_match or !inode_match) {
789 cache_hash_file.stat = .{
790 .size = actual_stat.size,
791 .mtime = actual_stat.mtime,
792 .inode = actual_stat.inode,
793 };
794
795 if (try self.isProblematicTimestamp(cache_hash_file.stat.mtime)) {
796 // The actual file has an unreliable timestamp, force it to be hashed
797 cache_hash_file.stat.mtime = .zero;
798 cache_hash_file.stat.inode = 0;
799 }
800
801 var actual_digest: BinDigest = undefined;
802 hashFile(this_file, &actual_digest) catch |err| {
803 self.diagnostic = .{ .file_read = .{
804 .file_index = idx,
805 .err = err,
806 } };
807 return error.CacheCheckFailed;
808 };
809
810 if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) {
811 cache_hash_file.bin_digest = actual_digest;
812 // keep going until we have the input file digests
813 any_file_changed = true;
814 }
815 }
816
817 if (!any_file_changed) {
818 self.hash.hasher.update(&cache_hash_file.bin_digest);
819 }
820 }
821
822 // If the manifest was somehow missing one of our input files, or if any file hash has changed,
823 // then this is a cache miss. However, we have successfully populated some or all of the file
824 // digests.
825 if (any_file_changed or idx < input_file_count) {
826 return .{ .miss = .{ .file_digests_populated = idx } };
827 }
828
829 return .hit;
830 }
831
832 /// Reset `self.hash.hasher` to the state it should be in after `hit` returns `false`.
833 /// The hasher contains the original input digest, and all original input file digests (i.e.
834 /// not including post files).
835 /// Assumes that `bin_digest` is populated for all files up to `input_file_count`. As such,
836 /// this is not necessarily safe to call within `hit`.
837 pub fn unhit(self: *Manifest, bin_digest: BinDigest, input_file_count: usize) void {
838 // Reset the hash.
839 self.hash.hasher = hasher_init;
840 self.hash.hasher.update(&bin_digest);
841
842 // Remove files not in the initial hash.
843 while (self.files.count() != input_file_count) {
844 var file = self.files.pop().?;
845 file.key.deinit(self.cache.gpa);
846 }
847
848 for (self.files.keys()) |file| {
849 self.hash.hasher.update(&file.bin_digest);
850 }
851 }
852
853 fn isProblematicTimestamp(man: *Manifest, timestamp: Io.Timestamp) error{Canceled}!bool {
854 const io = man.cache.io;
855
856 // If the file_time is prior to the most recent problematic timestamp
857 // then we don't need to access the filesystem.
858 if (timestamp.nanoseconds < man.recent_problematic_timestamp.nanoseconds)
859 return false;
860
861 // Next we will check the globally shared Cache timestamp, which is accessed
862 // from multiple threads.
863 try man.cache.mutex.lock(io);
864 defer man.cache.mutex.unlock(io);
865
866 // Save the global one to our local one to avoid locking next time.
867 man.recent_problematic_timestamp = man.cache.recent_problematic_timestamp;
868 if (timestamp.nanoseconds < man.recent_problematic_timestamp.nanoseconds)
869 return false;
870
871 // This flag prevents multiple filesystem writes for the same hit() call.
872 if (man.want_refresh_timestamp) {
873 man.want_refresh_timestamp = false;
874
875 var file = man.cache.manifest_dir.createFile("timestamp", .{
876 .read = true,
877 .truncate = true,
878 }) catch |err| switch (err) {
879 error.Canceled => return error.Canceled,
880 else => return true,
881 };
882 defer file.close();
883
884 // Save locally and also save globally (we still hold the global lock).
885 const stat = file.stat() catch |err| switch (err) {
886 error.Canceled => return error.Canceled,
887 else => return true,
888 };
889 man.recent_problematic_timestamp = stat.mtime;
890 man.cache.recent_problematic_timestamp = man.recent_problematic_timestamp;
891 }
892
893 return timestamp.nanoseconds >= man.recent_problematic_timestamp.nanoseconds;
894 }
895
896 fn populateFileHash(self: *Manifest, ch_file: *File) !void {
897 if (ch_file.handle) |handle| {
898 return populateFileHashHandle(self, ch_file, handle);
899 } else {
900 const pp = ch_file.prefixed_path;
901 const dir = self.cache.prefixes()[pp.prefix].handle;
902 const handle = try dir.openFile(pp.sub_path, .{});
903 defer handle.close();
904 return populateFileHashHandle(self, ch_file, handle);
905 }
906 }
907
908 fn populateFileHashHandle(self: *Manifest, ch_file: *File, handle: fs.File) !void {
909 const actual_stat = try handle.stat();
910 ch_file.stat = .{
911 .size = actual_stat.size,
912 .mtime = actual_stat.mtime,
913 .inode = actual_stat.inode,
914 };
915
916 if (try self.isProblematicTimestamp(ch_file.stat.mtime)) {
917 // The actual file has an unreliable timestamp, force it to be hashed
918 ch_file.stat.mtime = .zero;
919 ch_file.stat.inode = 0;
920 }
921
922 if (ch_file.max_file_size) |max_file_size| {
923 if (ch_file.stat.size > max_file_size) {
924 return error.FileTooBig;
925 }
926
927 const contents = try self.cache.gpa.alloc(u8, @as(usize, @intCast(ch_file.stat.size)));
928 errdefer self.cache.gpa.free(contents);
929
930 // Hash while reading from disk, to keep the contents in the cpu cache while
931 // doing hashing.
932 var hasher = hasher_init;
933 var off: usize = 0;
934 while (true) {
935 const bytes_read = try handle.pread(contents[off..], off);
936 if (bytes_read == 0) break;
937 hasher.update(contents[off..][0..bytes_read]);
938 off += bytes_read;
939 }
940 hasher.final(&ch_file.bin_digest);
941
942 ch_file.contents = contents;
943 } else {
944 try hashFile(handle, &ch_file.bin_digest);
945 }
946
947 self.hash.hasher.update(&ch_file.bin_digest);
948 }
949
950 /// Add a file as a dependency of process being cached, after the initial hash has been
951 /// calculated. This is useful for processes that don't know all the files that
952 /// are depended on ahead of time. For example, a source file that can import other files
953 /// will need to be recompiled if the imported file is changed.
954 pub fn addFilePostFetch(self: *Manifest, file_path: []const u8, max_file_size: usize) ![]const u8 {
955 assert(self.manifest_file != null);
956
957 const gpa = self.cache.gpa;
958 const prefixed_path = try self.cache.findPrefix(file_path);
959 errdefer gpa.free(prefixed_path.sub_path);
960
961 const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
962 errdefer _ = self.files.pop();
963
964 if (gop.found_existing) {
965 gpa.free(prefixed_path.sub_path);
966 return gop.key_ptr.contents.?;
967 }
968
969 gop.key_ptr.* = .{
970 .prefixed_path = prefixed_path,
971 .max_file_size = max_file_size,
972 .stat = undefined,
973 .bin_digest = undefined,
974 .contents = null,
975 };
976
977 self.files.lockPointers();
978 defer self.files.unlockPointers();
979
980 try self.populateFileHash(gop.key_ptr);
981 return gop.key_ptr.contents.?;
982 }
983
984 /// Add a file as a dependency of process being cached, after the initial hash has been
985 /// calculated.
986 ///
987 /// This is useful for processes that don't know the all the files that are
988 /// depended on ahead of time. For example, a source file that can import
989 /// other files will need to be recompiled if the imported file is changed.
990 pub fn addFilePost(self: *Manifest, file_path: []const u8) !void {
991 assert(self.manifest_file != null);
992
993 const gpa = self.cache.gpa;
994 const prefixed_path = try self.cache.findPrefix(file_path);
995 errdefer gpa.free(prefixed_path.sub_path);
996
997 const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
998 errdefer _ = self.files.pop();
999
1000 if (gop.found_existing) {
1001 gpa.free(prefixed_path.sub_path);
1002 return;
1003 }
1004
1005 gop.key_ptr.* = .{
1006 .prefixed_path = prefixed_path,
1007 .max_file_size = null,
1008 .handle = null,
1009 .stat = undefined,
1010 .bin_digest = undefined,
1011 .contents = null,
1012 };
1013
1014 self.files.lockPointers();
1015 defer self.files.unlockPointers();
1016
1017 try self.populateFileHash(gop.key_ptr);
1018 }
1019
1020 /// Like `addFilePost` but when the file contents have already been loaded from disk.
1021 pub fn addFilePostContents(
1022 self: *Manifest,
1023 file_path: []const u8,
1024 bytes: []const u8,
1025 stat: File.Stat,
1026 ) !void {
1027 assert(self.manifest_file != null);
1028 const gpa = self.cache.gpa;
1029
1030 const prefixed_path = try self.cache.findPrefix(file_path);
1031 errdefer gpa.free(prefixed_path.sub_path);
1032
1033 const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
1034 errdefer _ = self.files.pop();
1035
1036 if (gop.found_existing) {
1037 gpa.free(prefixed_path.sub_path);
1038 return;
1039 }
1040
1041 const new_file = gop.key_ptr;
1042
1043 new_file.* = .{
1044 .prefixed_path = prefixed_path,
1045 .max_file_size = null,
1046 .handle = null,
1047 .stat = stat,
1048 .bin_digest = undefined,
1049 .contents = null,
1050 };
1051
1052 if (try self.isProblematicTimestamp(new_file.stat.mtime)) {
1053 // The actual file has an unreliable timestamp, force it to be hashed
1054 new_file.stat.mtime = .zero;
1055 new_file.stat.inode = 0;
1056 }
1057
1058 {
1059 var hasher = hasher_init;
1060 hasher.update(bytes);
1061 hasher.final(&new_file.bin_digest);
1062 }
1063
1064 self.hash.hasher.update(&new_file.bin_digest);
1065 }
1066
1067 pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_sub_path: []const u8) !void {
1068 assert(self.manifest_file != null);
1069 return self.addDepFileMaybePost(dir, dep_file_sub_path);
1070 }
1071
1072 fn addDepFileMaybePost(self: *Manifest, dir: fs.Dir, dep_file_sub_path: []const u8) !void {
1073 const gpa = self.cache.gpa;
1074 const dep_file_contents = try dir.readFileAlloc(dep_file_sub_path, gpa, .limited(manifest_file_size_max));
1075 defer gpa.free(dep_file_contents);
1076
1077 var error_buf: std.ArrayList(u8) = .empty;
1078 defer error_buf.deinit(gpa);
1079
1080 var resolve_buf: std.ArrayList(u8) = .empty;
1081 defer resolve_buf.deinit(gpa);
1082
1083 var it: DepTokenizer = .{ .bytes = dep_file_contents };
1084 while (it.next()) |token| {
1085 switch (token) {
1086 // We don't care about targets, we only want the prereqs
1087 // Clang is invoked in single-source mode but other programs may not
1088 .target, .target_must_resolve => {},
1089 .prereq => |file_path| if (self.manifest_file == null) {
1090 _ = try self.addFile(file_path, null);
1091 } else try self.addFilePost(file_path),
1092 .prereq_must_resolve => {
1093 resolve_buf.clearRetainingCapacity();
1094 try token.resolve(gpa, &resolve_buf);
1095 if (self.manifest_file == null) {
1096 _ = try self.addFile(resolve_buf.items, null);
1097 } else try self.addFilePost(resolve_buf.items);
1098 },
1099 else => |err| {
1100 try err.printError(gpa, &error_buf);
1101 log.err("failed parsing {s}: {s}", .{ dep_file_sub_path, error_buf.items });
1102 return error.InvalidDepFile;
1103 },
1104 }
1105 }
1106 }
1107
1108 /// Returns a binary hash of the inputs.
1109 pub fn finalBin(self: *Manifest) BinDigest {
1110 assert(self.manifest_file != null);
1111
1112 // We don't close the manifest file yet, because we want to
1113 // keep it locked until the API user is done using it.
1114 // We also don't write out the manifest yet, because until
1115 // cache_release is called we still might be working on creating
1116 // the artifacts to cache.
1117
1118 var bin_digest: BinDigest = undefined;
1119 self.hash.hasher.final(&bin_digest);
1120 return bin_digest;
1121 }
1122
1123 /// Returns a hex encoded hash of the inputs.
1124 pub fn final(self: *Manifest) HexDigest {
1125 const bin_digest = self.finalBin();
1126 return binToHex(bin_digest);
1127 }
1128
1129 /// If `want_shared_lock` is true, this function automatically downgrades the
1130 /// lock from exclusive to shared.
1131 pub fn writeManifest(self: *Manifest) !void {
1132 assert(self.have_exclusive_lock);
1133
1134 const manifest_file = self.manifest_file.?;
1135 if (self.manifest_dirty) {
1136 self.manifest_dirty = false;
1137
1138 var buffer: [4000]u8 = undefined;
1139 var fw = manifest_file.writer(&buffer);
1140 writeDirtyManifestToStream(self, &fw) catch |err| switch (err) {
1141 error.WriteFailed => return fw.err.?,
1142 else => |e| return e,
1143 };
1144 }
1145
1146 if (self.want_shared_lock) {
1147 try self.downgradeToSharedLock();
1148 }
1149 }
1150
1151 fn writeDirtyManifestToStream(self: *Manifest, fw: *fs.File.Writer) !void {
1152 try fw.interface.writeAll(manifest_header ++ "\n");
1153 for (self.files.keys()) |file| {
1154 try fw.interface.print("{d} {d} {d} {x} {d} {s}\n", .{
1155 file.stat.size,
1156 file.stat.inode,
1157 file.stat.mtime,
1158 &file.bin_digest,
1159 file.prefixed_path.prefix,
1160 file.prefixed_path.sub_path,
1161 });
1162 }
1163 try fw.end();
1164 }
1165
1166 fn downgradeToSharedLock(self: *Manifest) !void {
1167 if (!self.have_exclusive_lock) return;
1168
1169 // WASI does not currently support flock, so we bypass it here.
1170 // TODO: If/when flock is supported on WASI, this check should be removed.
1171 // See https://github.com/WebAssembly/wasi-filesystem/issues/2
1172 if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) {
1173 const manifest_file = self.manifest_file.?;
1174 try manifest_file.downgradeLock();
1175 }
1176
1177 self.have_exclusive_lock = false;
1178 }
1179
1180 fn upgradeToExclusiveLock(self: *Manifest) error{CacheCheckFailed}!bool {
1181 if (self.have_exclusive_lock) return false;
1182 assert(self.manifest_file != null);
1183
1184 // WASI does not currently support flock, so we bypass it here.
1185 // TODO: If/when flock is supported on WASI, this check should be removed.
1186 // See https://github.com/WebAssembly/wasi-filesystem/issues/2
1187 if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) {
1188 const manifest_file = self.manifest_file.?;
1189 // Here we intentionally have a period where the lock is released, in case there are
1190 // other processes holding a shared lock.
1191 manifest_file.unlock();
1192 manifest_file.lock(.exclusive) catch |err| {
1193 self.diagnostic = .{ .manifest_lock = err };
1194 return error.CacheCheckFailed;
1195 };
1196 }
1197 self.have_exclusive_lock = true;
1198 return true;
1199 }
1200
1201 /// Obtain only the data needed to maintain a lock on the manifest file.
1202 /// The `Manifest` remains safe to deinit.
1203 /// Don't forget to call `writeManifest` before this!
1204 pub fn toOwnedLock(self: *Manifest) Lock {
1205 const lock: Lock = .{
1206 .manifest_file = self.manifest_file.?,
1207 };
1208
1209 self.manifest_file = null;
1210 return lock;
1211 }
1212
1213 /// Releases the manifest file and frees any memory the Manifest was using.
1214 /// `Manifest.hit` must be called first.
1215 /// Don't forget to call `writeManifest` before this!
1216 pub fn deinit(self: *Manifest) void {
1217 if (self.manifest_file) |file| {
1218 if (builtin.os.tag == .windows) {
1219 // See Lock.release for why this is required on Windows
1220 file.unlock();
1221 }
1222
1223 file.close();
1224 }
1225 for (self.files.keys()) |*file| {
1226 file.deinit(self.cache.gpa);
1227 }
1228 self.files.deinit(self.cache.gpa);
1229 }
1230
1231 pub fn populateFileSystemInputs(man: *Manifest, buf: *std.ArrayList(u8)) Allocator.Error!void {
1232 assert(@typeInfo(std.zig.Server.Message.PathPrefix).@"enum".fields.len == man.cache.prefixes_len);
1233 buf.clearRetainingCapacity();
1234 const gpa = man.cache.gpa;
1235 const files = man.files.keys();
1236 if (files.len > 0) {
1237 for (files) |file| {
1238 try buf.ensureUnusedCapacity(gpa, file.prefixed_path.sub_path.len + 2);
1239 buf.appendAssumeCapacity(file.prefixed_path.prefix + 1);
1240 buf.appendSliceAssumeCapacity(file.prefixed_path.sub_path);
1241 buf.appendAssumeCapacity(0);
1242 }
1243 // The null byte is a separator, not a terminator.
1244 buf.items.len -= 1;
1245 }
1246 }
1247
1248 pub fn populateOtherManifest(man: *Manifest, other: *Manifest, prefix_map: [4]u8) Allocator.Error!void {
1249 const gpa = other.cache.gpa;
1250 assert(@typeInfo(std.zig.Server.Message.PathPrefix).@"enum".fields.len == man.cache.prefixes_len);
1251 assert(man.cache.prefixes_len == 4);
1252 for (man.files.keys()) |file| {
1253 const prefixed_path: PrefixedPath = .{
1254 .prefix = prefix_map[file.prefixed_path.prefix],
1255 .sub_path = try gpa.dupe(u8, file.prefixed_path.sub_path),
1256 };
1257 errdefer gpa.free(prefixed_path.sub_path);
1258
1259 const gop = try other.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
1260 errdefer _ = other.files.pop();
1261
1262 if (gop.found_existing) {
1263 gpa.free(prefixed_path.sub_path);
1264 continue;
1265 }
1266
1267 gop.key_ptr.* = .{
1268 .prefixed_path = prefixed_path,
1269 .max_file_size = file.max_file_size,
1270 .handle = file.handle,
1271 .stat = file.stat,
1272 .bin_digest = file.bin_digest,
1273 .contents = null,
1274 };
1275
1276 other.hash.hasher.update(&gop.key_ptr.bin_digest);
1277 }
1278 }
1279};
1280
1281/// On operating systems that support symlinks, does a readlink. On other operating systems,
1282/// uses the file contents. Windows supports symlinks but only with elevated privileges, so
1283/// it is treated as not supporting symlinks.
1284pub fn readSmallFile(dir: fs.Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
1285 if (builtin.os.tag == .windows) {
1286 return dir.readFile(sub_path, buffer);
1287 } else {
1288 return dir.readLink(sub_path, buffer);
1289 }
1290}
1291
1292/// On operating systems that support symlinks, does a symlink. On other operating systems,
1293/// uses the file contents. Windows supports symlinks but only with elevated privileges, so
1294/// it is treated as not supporting symlinks.
1295/// `data` must be a valid UTF-8 encoded file path and 255 bytes or fewer.
1296pub fn writeSmallFile(dir: fs.Dir, sub_path: []const u8, data: []const u8) !void {
1297 assert(data.len <= 255);
1298 if (builtin.os.tag == .windows) {
1299 return dir.writeFile(.{ .sub_path = sub_path, .data = data });
1300 } else {
1301 return dir.symLink(data, sub_path, .{});
1302 }
1303}
1304
1305fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) fs.File.PReadError!void {
1306 var buf: [1024]u8 = undefined;
1307 var hasher = hasher_init;
1308 var off: u64 = 0;
1309 while (true) {
1310 const bytes_read = try file.pread(&buf, off);
1311 if (bytes_read == 0) break;
1312 hasher.update(buf[0..bytes_read]);
1313 off += bytes_read;
1314 }
1315 hasher.final(bin_digest);
1316}
1317
1318// Create/Write a file, close it, then grab its stat.mtime timestamp.
1319fn testGetCurrentFileTimestamp(dir: fs.Dir) !Io.Timestamp {
1320 const test_out_file = "test-filetimestamp.tmp";
1321
1322 var file = try dir.createFile(test_out_file, .{
1323 .read = true,
1324 .truncate = true,
1325 });
1326 defer {
1327 file.close();
1328 dir.deleteFile(test_out_file) catch {};
1329 }
1330
1331 return (try file.stat()).mtime;
1332}
1333
1334test "cache file and then recall it" {
1335 const io = std.testing.io;
1336
1337 var tmp = testing.tmpDir(.{});
1338 defer tmp.cleanup();
1339
1340 const temp_file = "test.txt";
1341 const temp_manifest_dir = "temp_manifest_dir";
1342
1343 try tmp.dir.writeFile(.{ .sub_path = temp_file, .data = "Hello, world!\n" });
1344
1345 // Wait for file timestamps to tick
1346 const initial_time = try testGetCurrentFileTimestamp(tmp.dir);
1347 while ((try testGetCurrentFileTimestamp(tmp.dir)).nanoseconds == initial_time.nanoseconds) {
1348 try std.Io.Clock.Duration.sleep(.{ .clock = .boot, .raw = .fromNanoseconds(1) }, io);
1349 }
1350
1351 var digest1: HexDigest = undefined;
1352 var digest2: HexDigest = undefined;
1353
1354 {
1355 var cache: Cache = .{
1356 .io = io,
1357 .gpa = testing.allocator,
1358 .manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
1359 };
1360 cache.addPrefix(.{ .path = null, .handle = tmp.dir });
1361 defer cache.manifest_dir.close();
1362
1363 {
1364 var ch = cache.obtain();
1365 defer ch.deinit();
1366
1367 ch.hash.add(true);
1368 ch.hash.add(@as(u16, 1234));
1369 ch.hash.addBytes("1234");
1370 _ = try ch.addFile(temp_file, null);
1371
1372 // There should be nothing in the cache
1373 try testing.expectEqual(false, try ch.hit());
1374
1375 digest1 = ch.final();
1376 try ch.writeManifest();
1377 }
1378 {
1379 var ch = cache.obtain();
1380 defer ch.deinit();
1381
1382 ch.hash.add(true);
1383 ch.hash.add(@as(u16, 1234));
1384 ch.hash.addBytes("1234");
1385 _ = try ch.addFile(temp_file, null);
1386
1387 // Cache hit! We just "built" the same file
1388 try testing.expect(try ch.hit());
1389 digest2 = ch.final();
1390
1391 try testing.expectEqual(false, ch.have_exclusive_lock);
1392 }
1393
1394 try testing.expectEqual(digest1, digest2);
1395 }
1396}
1397
1398test "check that changing a file makes cache fail" {
1399 const io = std.testing.io;
1400
1401 var tmp = testing.tmpDir(.{});
1402 defer tmp.cleanup();
1403
1404 const temp_file = "cache_hash_change_file_test.txt";
1405 const temp_manifest_dir = "cache_hash_change_file_manifest_dir";
1406 const original_temp_file_contents = "Hello, world!\n";
1407 const updated_temp_file_contents = "Hello, world; but updated!\n";
1408
1409 try tmp.dir.writeFile(.{ .sub_path = temp_file, .data = original_temp_file_contents });
1410
1411 // Wait for file timestamps to tick
1412 const initial_time = try testGetCurrentFileTimestamp(tmp.dir);
1413 while ((try testGetCurrentFileTimestamp(tmp.dir)).nanoseconds == initial_time.nanoseconds) {
1414 try std.Io.Clock.Duration.sleep(.{ .clock = .boot, .raw = .fromNanoseconds(1) }, io);
1415 }
1416
1417 var digest1: HexDigest = undefined;
1418 var digest2: HexDigest = undefined;
1419
1420 {
1421 var cache: Cache = .{
1422 .io = io,
1423 .gpa = testing.allocator,
1424 .manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
1425 };
1426 cache.addPrefix(.{ .path = null, .handle = tmp.dir });
1427 defer cache.manifest_dir.close();
1428
1429 {
1430 var ch = cache.obtain();
1431 defer ch.deinit();
1432
1433 ch.hash.addBytes("1234");
1434 const temp_file_idx = try ch.addFile(temp_file, 100);
1435
1436 // There should be nothing in the cache
1437 try testing.expectEqual(false, try ch.hit());
1438
1439 try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.keys()[temp_file_idx].contents.?));
1440
1441 digest1 = ch.final();
1442
1443 try ch.writeManifest();
1444 }
1445
1446 try tmp.dir.writeFile(.{ .sub_path = temp_file, .data = updated_temp_file_contents });
1447
1448 {
1449 var ch = cache.obtain();
1450 defer ch.deinit();
1451
1452 ch.hash.addBytes("1234");
1453 const temp_file_idx = try ch.addFile(temp_file, 100);
1454
1455 // A file that we depend on has been updated, so the cache should not contain an entry for it
1456 try testing.expectEqual(false, try ch.hit());
1457
1458 // The cache system does not keep the contents of re-hashed input files.
1459 try testing.expect(ch.files.keys()[temp_file_idx].contents == null);
1460
1461 digest2 = ch.final();
1462
1463 try ch.writeManifest();
1464 }
1465
1466 try testing.expect(!mem.eql(u8, digest1[0..], digest2[0..]));
1467 }
1468}
1469
1470test "no file inputs" {
1471 const io = testing.io;
1472
1473 var tmp = testing.tmpDir(.{});
1474 defer tmp.cleanup();
1475
1476 const temp_manifest_dir = "no_file_inputs_manifest_dir";
1477
1478 var digest1: HexDigest = undefined;
1479 var digest2: HexDigest = undefined;
1480
1481 var cache: Cache = .{
1482 .io = io,
1483 .gpa = testing.allocator,
1484 .manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
1485 };
1486 cache.addPrefix(.{ .path = null, .handle = tmp.dir });
1487 defer cache.manifest_dir.close();
1488
1489 {
1490 var man = cache.obtain();
1491 defer man.deinit();
1492
1493 man.hash.addBytes("1234");
1494
1495 // There should be nothing in the cache
1496 try testing.expectEqual(false, try man.hit());
1497
1498 digest1 = man.final();
1499
1500 try man.writeManifest();
1501 }
1502 {
1503 var man = cache.obtain();
1504 defer man.deinit();
1505
1506 man.hash.addBytes("1234");
1507
1508 try testing.expect(try man.hit());
1509 digest2 = man.final();
1510 try testing.expectEqual(false, man.have_exclusive_lock);
1511 }
1512
1513 try testing.expectEqual(digest1, digest2);
1514}
1515
1516test "Manifest with files added after initial hash work" {
1517 const io = std.testing.io;
1518
1519 var tmp = testing.tmpDir(.{});
1520 defer tmp.cleanup();
1521
1522 const temp_file1 = "cache_hash_post_file_test1.txt";
1523 const temp_file2 = "cache_hash_post_file_test2.txt";
1524 const temp_manifest_dir = "cache_hash_post_file_manifest_dir";
1525
1526 try tmp.dir.writeFile(.{ .sub_path = temp_file1, .data = "Hello, world!\n" });
1527 try tmp.dir.writeFile(.{ .sub_path = temp_file2, .data = "Hello world the second!\n" });
1528
1529 // Wait for file timestamps to tick
1530 const initial_time = try testGetCurrentFileTimestamp(tmp.dir);
1531 while ((try testGetCurrentFileTimestamp(tmp.dir)).nanoseconds == initial_time.nanoseconds) {
1532 try std.Io.Clock.Duration.sleep(.{ .clock = .boot, .raw = .fromNanoseconds(1) }, io);
1533 }
1534
1535 var digest1: HexDigest = undefined;
1536 var digest2: HexDigest = undefined;
1537 var digest3: HexDigest = undefined;
1538
1539 {
1540 var cache: Cache = .{
1541 .io = io,
1542 .gpa = testing.allocator,
1543 .manifest_dir = try tmp.dir.makeOpenPath(temp_manifest_dir, .{}),
1544 };
1545 cache.addPrefix(.{ .path = null, .handle = tmp.dir });
1546 defer cache.manifest_dir.close();
1547
1548 {
1549 var ch = cache.obtain();
1550 defer ch.deinit();
1551
1552 ch.hash.addBytes("1234");
1553 _ = try ch.addFile(temp_file1, null);
1554
1555 // There should be nothing in the cache
1556 try testing.expectEqual(false, try ch.hit());
1557
1558 _ = try ch.addFilePost(temp_file2);
1559
1560 digest1 = ch.final();
1561 try ch.writeManifest();
1562 }
1563 {
1564 var ch = cache.obtain();
1565 defer ch.deinit();
1566
1567 ch.hash.addBytes("1234");
1568 _ = try ch.addFile(temp_file1, null);
1569
1570 try testing.expect(try ch.hit());
1571 digest2 = ch.final();
1572
1573 try testing.expectEqual(false, ch.have_exclusive_lock);
1574 }
1575 try testing.expect(mem.eql(u8, &digest1, &digest2));
1576
1577 // Modify the file added after initial hash
1578 try tmp.dir.writeFile(.{ .sub_path = temp_file2, .data = "Hello world the second, updated\n" });
1579
1580 // Wait for file timestamps to tick
1581 const initial_time2 = try testGetCurrentFileTimestamp(tmp.dir);
1582 while ((try testGetCurrentFileTimestamp(tmp.dir)).nanoseconds == initial_time2.nanoseconds) {
1583 try std.Io.Clock.Duration.sleep(.{ .clock = .boot, .raw = .fromNanoseconds(1) }, io);
1584 }
1585
1586 {
1587 var ch = cache.obtain();
1588 defer ch.deinit();
1589
1590 ch.hash.addBytes("1234");
1591 _ = try ch.addFile(temp_file1, null);
1592
1593 // A file that we depend on has been updated, so the cache should not contain an entry for it
1594 try testing.expectEqual(false, try ch.hit());
1595
1596 _ = try ch.addFilePost(temp_file2);
1597
1598 digest3 = ch.final();
1599
1600 try ch.writeManifest();
1601 }
1602
1603 try testing.expect(!mem.eql(u8, &digest1, &digest3));
1604 }
1605}