Commit a57b0a0f2f

Andrew Kelley <andrew@ziglang.org>
2025-02-24 04:20:38
fix generated hash of by-path dependencies
This branch regressed from master by switching to binary rather than hex digest, allowing null bytes to end up in identifiers in the zig file. This commit fixes it by changing the "hash" to be literally equal to the sub_path (with a prefix '/' to indicate "global") if it can fit. If it is too long then it is actually hashed, and that value used instead.
1 parent e03bc7a
Changed files (2)
src/Package/Fetch.zig
@@ -775,16 +775,7 @@ fn queueJobsForDeps(f: *Fetch) RunError!void {
 }
 
 pub fn relativePathDigest(pkg_root: Cache.Path, cache_root: Cache.Directory) Package.Hash {
-    var hasher = Package.Hash.Algo.init(.{});
-    // This hash is a tuple of:
-    // * whether it relative to the global cache directory or to the root package
-    // * the relative file path from there to the build root of the package
-    hasher.update(if (pkg_root.root_dir.eql(cache_root))
-        &package_hash_prefix_cached
-    else
-        &package_hash_prefix_project);
-    hasher.update(pkg_root.sub_path);
-    return .fromSlice(&hasher.finalResult());
+    return .initPath(pkg_root.sub_path, pkg_root.root_dir.eql(cache_root));
 }
 
 pub fn workerRun(f: *Fetch, prog_name: []const u8) void {
@@ -1793,10 +1784,6 @@ pub fn depDigest(pkg_root: Cache.Path, cache_root: Cache.Directory, dep: Manifes
     }
 }
 
-// These are random bytes.
-const package_hash_prefix_cached = [8]u8{ 0x53, 0x7e, 0xfa, 0x94, 0x65, 0xe9, 0xf8, 0x73 };
-const package_hash_prefix_project = [8]u8{ 0xe1, 0x25, 0xee, 0xfa, 0xa6, 0x17, 0x38, 0xcc };
-
 const builtin = @import("builtin");
 const std = @import("std");
 const fs = std.fs;
src/Package.zig
@@ -15,6 +15,9 @@ pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
 ///
 /// This data structure can be used to store the legacy hash format too. Legacy
 /// hash format is scheduled to be removed after 0.14.0 is tagged.
+///
+/// There's also a third way this structure is used. When using path rather than
+/// hash, a unique hash is still needed, so one is computed based on the path.
 pub const Hash = struct {
     /// Maximum size of a package hash. Unused bytes at the end are
     /// filled with zeroes.
@@ -100,6 +103,25 @@ pub const Hash = struct {
         _ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]);
         return init(digest, &name, "N", size);
     }
+
+    /// Produces a unique hash based on the path provided. The result should
+    /// not be user-visible.
+    pub fn initPath(sub_path: []const u8, is_global: bool) Hash {
+        var result: Hash = .{ .bytes = @splat(0) };
+        var i: usize = 0;
+        if (is_global) {
+            result.bytes[0] = '/';
+            i += 1;
+        }
+        if (i + sub_path.len <= result.bytes.len) {
+            @memcpy(result.bytes[i..][0..sub_path.len], sub_path);
+            return result;
+        }
+        var bin_digest: [Algo.digest_length]u8 = undefined;
+        Algo.hash(sub_path, &bin_digest, .{});
+        _ = std.fmt.bufPrint(result.bytes[i..], "{}", .{std.fmt.fmtSliceHexLower(&bin_digest)}) catch unreachable;
+        return result;
+    }
 };
 
 pub const MultihashFunction = enum(u16) {