master
  1const std = @import("std");
  2const assert = std.debug.assert;
  3
  4pub const Module = @import("Package/Module.zig");
  5pub const Fetch = @import("Package/Fetch.zig");
  6pub const build_zig_basename = "build.zig";
  7pub const Manifest = @import("Package/Manifest.zig");
  8
  9pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
 10pub const multihash_hex_digest_len = 2 * multihash_len;
 11pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
 12
 13pub const Fingerprint = packed struct(u64) {
 14    id: u32,
 15    checksum: u32,
 16
 17    pub fn generate(name: []const u8) Fingerprint {
 18        return .{
 19            .id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff),
 20            .checksum = std.hash.Crc32.hash(name),
 21        };
 22    }
 23
 24    pub fn validate(n: Fingerprint, name: []const u8) bool {
 25        switch (n.id) {
 26            0x00000000, 0xffffffff => return false,
 27            else => return std.hash.Crc32.hash(name) == n.checksum,
 28        }
 29    }
 30
 31    pub fn int(n: Fingerprint) u64 {
 32        return @bitCast(n);
 33    }
 34};
 35
 36/// A user-readable, file system safe hash that identifies an exact package
 37/// snapshot, including file contents.
 38///
 39/// The hash is not only to prevent collisions but must resist attacks where
 40/// the adversary fully controls the contents being hashed. Thus, it contains
 41/// a full SHA-256 digest.
 42///
 43/// This data structure can be used to store the legacy hash format too. Legacy
 44/// hash format is scheduled to be removed after 0.14.0 is tagged.
 45///
 46/// There's also a third way this structure is used. When using path rather than
 47/// hash, a unique hash is still needed, so one is computed based on the path.
 48pub const Hash = struct {
 49    /// Maximum size of a package hash. Unused bytes at the end are
 50    /// filled with zeroes.
 51    bytes: [max_len]u8,
 52
 53    pub const Algo = std.crypto.hash.sha2.Sha256;
 54    pub const Digest = [Algo.digest_length]u8;
 55
 56    /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh"
 57    pub const max_len = 32 + 1 + 32 + 1 + (32 + 32 + 200) / 6;
 58
 59    pub fn fromSlice(s: []const u8) Hash {
 60        assert(s.len <= max_len);
 61        var result: Hash = undefined;
 62        @memcpy(result.bytes[0..s.len], s);
 63        @memset(result.bytes[s.len..], 0);
 64        return result;
 65    }
 66
 67    pub fn toSlice(ph: *const Hash) []const u8 {
 68        var end: usize = ph.bytes.len;
 69        while (end > 0) {
 70            end -= 1;
 71            if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1];
 72        }
 73        return ph.bytes[0..0];
 74    }
 75
 76    pub fn eql(a: *const Hash, b: *const Hash) bool {
 77        return std.mem.eql(u8, &a.bytes, &b.bytes);
 78    }
 79
 80    /// Distinguishes whether the legacy multihash format is being stored here.
 81    pub fn isOld(h: *const Hash) bool {
 82        if (h.bytes.len < 2) return false;
 83        const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false;
 84        if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false;
 85        if (h.toSlice().len != multihash_hex_digest_len) return false;
 86        return std.mem.indexOfScalar(u8, &h.bytes, '-') == null;
 87    }
 88
 89    test isOld {
 90        const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7");
 91        try std.testing.expect(h.isOld());
 92    }
 93
 94    /// Produces "$name-$semver-$hashplus".
 95    /// * name is the name field from build.zig.zon, asserted to be at most 32
 96    ///   bytes and assumed be a valid zig identifier
 97    /// * semver is the version field from build.zig.zon, asserted to be at
 98    ///   most 32 bytes
 99    /// * hashplus is the following 33-byte array, base64 encoded using -_ to make
100    ///   it filesystem safe:
101    ///   - (4 bytes) LE u32 Package ID
102    ///   - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated
103    ///   - (25 bytes) truncated SHA-256 digest of hashed files of the package
104    pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u32, size: u32) Hash {
105        assert(name.len <= 32);
106        assert(ver.len <= 32);
107        var result: Hash = undefined;
108        var buf: std.ArrayList(u8) = .initBuffer(&result.bytes);
109        buf.appendSliceAssumeCapacity(name);
110        buf.appendAssumeCapacity('-');
111        buf.appendSliceAssumeCapacity(ver);
112        buf.appendAssumeCapacity('-');
113        var hashplus: [33]u8 = undefined;
114        std.mem.writeInt(u32, hashplus[0..4], id, .little);
115        std.mem.writeInt(u32, hashplus[4..8], size, .little);
116        hashplus[8..].* = digest[0..25].*;
117        _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(44), &hashplus);
118        @memset(buf.unusedCapacitySlice(), 0);
119        return result;
120    }
121
122    /// Produces a unique hash based on the path provided. The result should
123    /// not be user-visible.
124    pub fn initPath(sub_path: []const u8, is_global: bool) Hash {
125        var result: Hash = .{ .bytes = @splat(0) };
126        var i: usize = 0;
127        if (is_global) {
128            result.bytes[0] = '/';
129            i += 1;
130        }
131        if (i + sub_path.len <= result.bytes.len) {
132            @memcpy(result.bytes[i..][0..sub_path.len], sub_path);
133            return result;
134        }
135        var bin_digest: [Algo.digest_length]u8 = undefined;
136        Algo.hash(sub_path, &bin_digest, .{});
137        _ = std.fmt.bufPrint(result.bytes[i..], "{x}", .{&bin_digest}) catch unreachable;
138        return result;
139    }
140};
141
142pub const MultihashFunction = enum(u16) {
143    identity = 0x00,
144    sha1 = 0x11,
145    @"sha2-256" = 0x12,
146    @"sha2-512" = 0x13,
147    @"sha3-512" = 0x14,
148    @"sha3-384" = 0x15,
149    @"sha3-256" = 0x16,
150    @"sha3-224" = 0x17,
151    @"sha2-384" = 0x20,
152    @"sha2-256-trunc254-padded" = 0x1012,
153    @"sha2-224" = 0x1013,
154    @"sha2-512-224" = 0x1014,
155    @"sha2-512-256" = 0x1015,
156    @"blake2b-256" = 0xb220,
157    _,
158};
159
160pub const multihash_function: MultihashFunction = switch (Hash.Algo) {
161    std.crypto.hash.sha2.Sha256 => .@"sha2-256",
162    else => unreachable,
163};
164
165pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest {
166    const hex_charset = std.fmt.hex_charset;
167
168    var result: MultiHashHexDigest = undefined;
169
170    result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
171    result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
172
173    result[2] = hex_charset[Hash.Algo.digest_length >> 4];
174    result[3] = hex_charset[Hash.Algo.digest_length & 15];
175
176    for (digest, 0..) |byte, i| {
177        result[4 + i * 2] = hex_charset[byte >> 4];
178        result[5 + i * 2] = hex_charset[byte & 15];
179    }
180    return result;
181}
182
183comptime {
184    // We avoid unnecessary uleb128 code in hexDigest by asserting here the
185    // values are small enough to be contained in the one-byte encoding.
186    assert(@intFromEnum(multihash_function) < 127);
187    assert(Hash.Algo.digest_length < 127);
188}
189
190test Hash {
191    const example_digest: Hash.Digest = .{
192        0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87,
193        0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f,
194    };
195    const result: Hash = .init(example_digest, "nasm", "2.16.1-3", 0xcafebabe, 10 * 1024 * 1024);
196    try std.testing.expectEqualStrings("nasm-2.16.1-3-vrr-ygAAoADH9XG3tOdvPNuHen_d-XeHndOG-nNXmved", result.toSlice());
197}
198
199test "empty hash" {
200    const hash = Hash.fromSlice("");
201    try std.testing.expectEqualStrings("", hash.toSlice());
202}
203
204test {
205    _ = Fetch;
206}