master
1/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
2/// the final digest.
3/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
4/// and we will use it too as it seems accepted by Apple OSes.
5/// TODO LLD also hashes the output filename to disambiguate between same builds with different
6/// output files. Should we also do that?
7pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
8 const tracy = trace(@src());
9 defer tracy.end();
10
11 const chunk_size: usize = 1024 * 1024;
12 const num_chunks: usize = std.math.cast(usize, @divTrunc(file_size, chunk_size)) orelse return error.Overflow;
13 const actual_num_chunks = if (@rem(file_size, chunk_size) > 0) num_chunks + 1 else num_chunks;
14
15 const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks);
16 defer comp.gpa.free(hashes);
17
18 var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool };
19 try hasher.hash(file, hashes, .{
20 .chunk_size = chunk_size,
21 .max_file_size = file_size,
22 });
23
24 const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length);
25 defer comp.gpa.free(final_buffer);
26
27 for (hashes, 0..) |hash, i| {
28 @memcpy(final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
29 }
30
31 Md5.hash(final_buffer, out, .{});
32 conform(out);
33}
34
35inline fn conform(out: *[Md5.digest_length]u8) void {
36 // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
37 out[6] = (out[6] & 0x0F) | (3 << 4);
38 out[8] = (out[8] & 0x3F) | 0x80;
39}
40
41const fs = std.fs;
42const mem = std.mem;
43const std = @import("std");
44const trace = @import("../../tracy.zig").trace;
45
46const Compilation = @import("../../Compilation.zig");
47const Md5 = std.crypto.hash.Md5;
48const Hasher = @import("hasher.zig").ParallelHasher;
49const ThreadPool = std.Thread.Pool;