master
1const std = @import("std");
2const assert = std.debug.assert;
3
4pub const Module = @import("Package/Module.zig");
5pub const Fetch = @import("Package/Fetch.zig");
6pub const build_zig_basename = "build.zig";
7pub const Manifest = @import("Package/Manifest.zig");
8
9pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
10pub const multihash_hex_digest_len = 2 * multihash_len;
11pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
12
13pub const Fingerprint = packed struct(u64) {
14 id: u32,
15 checksum: u32,
16
17 pub fn generate(name: []const u8) Fingerprint {
18 return .{
19 .id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff),
20 .checksum = std.hash.Crc32.hash(name),
21 };
22 }
23
24 pub fn validate(n: Fingerprint, name: []const u8) bool {
25 switch (n.id) {
26 0x00000000, 0xffffffff => return false,
27 else => return std.hash.Crc32.hash(name) == n.checksum,
28 }
29 }
30
31 pub fn int(n: Fingerprint) u64 {
32 return @bitCast(n);
33 }
34};
35
36/// A user-readable, file system safe hash that identifies an exact package
37/// snapshot, including file contents.
38///
39/// The hash is not only to prevent collisions but must resist attacks where
40/// the adversary fully controls the contents being hashed. Thus, it contains
41/// a full SHA-256 digest.
42///
43/// This data structure can be used to store the legacy hash format too. Legacy
44/// hash format is scheduled to be removed after 0.14.0 is tagged.
45///
46/// There's also a third way this structure is used. When using path rather than
47/// hash, a unique hash is still needed, so one is computed based on the path.
48pub const Hash = struct {
49 /// Maximum size of a package hash. Unused bytes at the end are
50 /// filled with zeroes.
51 bytes: [max_len]u8,
52
53 pub const Algo = std.crypto.hash.sha2.Sha256;
54 pub const Digest = [Algo.digest_length]u8;
55
56 /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh"
57 pub const max_len = 32 + 1 + 32 + 1 + (32 + 32 + 200) / 6;
58
59 pub fn fromSlice(s: []const u8) Hash {
60 assert(s.len <= max_len);
61 var result: Hash = undefined;
62 @memcpy(result.bytes[0..s.len], s);
63 @memset(result.bytes[s.len..], 0);
64 return result;
65 }
66
67 pub fn toSlice(ph: *const Hash) []const u8 {
68 var end: usize = ph.bytes.len;
69 while (end > 0) {
70 end -= 1;
71 if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1];
72 }
73 return ph.bytes[0..0];
74 }
75
76 pub fn eql(a: *const Hash, b: *const Hash) bool {
77 return std.mem.eql(u8, &a.bytes, &b.bytes);
78 }
79
80 /// Distinguishes whether the legacy multihash format is being stored here.
81 pub fn isOld(h: *const Hash) bool {
82 if (h.bytes.len < 2) return false;
83 const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false;
84 if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false;
85 if (h.toSlice().len != multihash_hex_digest_len) return false;
86 return std.mem.indexOfScalar(u8, &h.bytes, '-') == null;
87 }
88
89 test isOld {
90 const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7");
91 try std.testing.expect(h.isOld());
92 }
93
94 /// Produces "$name-$semver-$hashplus".
95 /// * name is the name field from build.zig.zon, asserted to be at most 32
96 /// bytes and assumed be a valid zig identifier
97 /// * semver is the version field from build.zig.zon, asserted to be at
98 /// most 32 bytes
99 /// * hashplus is the following 33-byte array, base64 encoded using -_ to make
100 /// it filesystem safe:
101 /// - (4 bytes) LE u32 Package ID
102 /// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated
103 /// - (25 bytes) truncated SHA-256 digest of hashed files of the package
104 pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u32, size: u32) Hash {
105 assert(name.len <= 32);
106 assert(ver.len <= 32);
107 var result: Hash = undefined;
108 var buf: std.ArrayList(u8) = .initBuffer(&result.bytes);
109 buf.appendSliceAssumeCapacity(name);
110 buf.appendAssumeCapacity('-');
111 buf.appendSliceAssumeCapacity(ver);
112 buf.appendAssumeCapacity('-');
113 var hashplus: [33]u8 = undefined;
114 std.mem.writeInt(u32, hashplus[0..4], id, .little);
115 std.mem.writeInt(u32, hashplus[4..8], size, .little);
116 hashplus[8..].* = digest[0..25].*;
117 _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(44), &hashplus);
118 @memset(buf.unusedCapacitySlice(), 0);
119 return result;
120 }
121
122 /// Produces a unique hash based on the path provided. The result should
123 /// not be user-visible.
124 pub fn initPath(sub_path: []const u8, is_global: bool) Hash {
125 var result: Hash = .{ .bytes = @splat(0) };
126 var i: usize = 0;
127 if (is_global) {
128 result.bytes[0] = '/';
129 i += 1;
130 }
131 if (i + sub_path.len <= result.bytes.len) {
132 @memcpy(result.bytes[i..][0..sub_path.len], sub_path);
133 return result;
134 }
135 var bin_digest: [Algo.digest_length]u8 = undefined;
136 Algo.hash(sub_path, &bin_digest, .{});
137 _ = std.fmt.bufPrint(result.bytes[i..], "{x}", .{&bin_digest}) catch unreachable;
138 return result;
139 }
140};
141
142pub const MultihashFunction = enum(u16) {
143 identity = 0x00,
144 sha1 = 0x11,
145 @"sha2-256" = 0x12,
146 @"sha2-512" = 0x13,
147 @"sha3-512" = 0x14,
148 @"sha3-384" = 0x15,
149 @"sha3-256" = 0x16,
150 @"sha3-224" = 0x17,
151 @"sha2-384" = 0x20,
152 @"sha2-256-trunc254-padded" = 0x1012,
153 @"sha2-224" = 0x1013,
154 @"sha2-512-224" = 0x1014,
155 @"sha2-512-256" = 0x1015,
156 @"blake2b-256" = 0xb220,
157 _,
158};
159
160pub const multihash_function: MultihashFunction = switch (Hash.Algo) {
161 std.crypto.hash.sha2.Sha256 => .@"sha2-256",
162 else => unreachable,
163};
164
165pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest {
166 const hex_charset = std.fmt.hex_charset;
167
168 var result: MultiHashHexDigest = undefined;
169
170 result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
171 result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
172
173 result[2] = hex_charset[Hash.Algo.digest_length >> 4];
174 result[3] = hex_charset[Hash.Algo.digest_length & 15];
175
176 for (digest, 0..) |byte, i| {
177 result[4 + i * 2] = hex_charset[byte >> 4];
178 result[5 + i * 2] = hex_charset[byte & 15];
179 }
180 return result;
181}
182
183comptime {
184 // We avoid unnecessary uleb128 code in hexDigest by asserting here the
185 // values are small enough to be contained in the one-byte encoding.
186 assert(@intFromEnum(multihash_function) < 127);
187 assert(Hash.Algo.digest_length < 127);
188}
189
190test Hash {
191 const example_digest: Hash.Digest = .{
192 0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87,
193 0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f,
194 };
195 const result: Hash = .init(example_digest, "nasm", "2.16.1-3", 0xcafebabe, 10 * 1024 * 1024);
196 try std.testing.expectEqualStrings("nasm-2.16.1-3-vrr-ygAAoADH9XG3tOdvPNuHen_d-XeHndOG-nNXmved", result.toSlice());
197}
198
199test "empty hash" {
200 const hash = Hash.fromSlice("");
201 try std.testing.expectEqualStrings("", hash.toSlice());
202}
203
204test {
205 _ = Fetch;
206}