Commit 79457fc76a
Changed files (7)
src/link/MachO/CodeSignature.zig
@@ -1,6 +1,4 @@
const CodeSignature = @This();
-const Compilation = @import("../../Compilation.zig");
-const WaitGroup = @import("../../WaitGroup.zig");
const std = @import("std");
const assert = std.debug.assert;
@@ -9,10 +7,13 @@ const log = std.log.scoped(.link);
const macho = std.macho;
const mem = std.mem;
const testing = std.testing;
+
const Allocator = mem.Allocator;
+const Compilation = @import("../../Compilation.zig");
+const Hasher = @import("hasher.zig").ParallelHasher;
const Sha256 = std.crypto.hash.sha2.Sha256;
-const hash_size: u8 = 32;
+const hash_size = Sha256.digest_length;
const Blob = union(enum) {
code_directory: *CodeDirectory,
@@ -109,7 +110,7 @@ const CodeDirectory = struct {
fn size(self: CodeDirectory) u32 {
const code_slots = self.inner.nCodeSlots * hash_size;
const special_slots = self.inner.nSpecialSlots * hash_size;
- return @sizeOf(macho.CodeDirectory) + @intCast(u32, self.ident.len + 1) + special_slots + code_slots;
+ return @sizeOf(macho.CodeDirectory) + @intCast(u32, self.ident.len + 1 + special_slots + code_slots);
}
fn write(self: CodeDirectory, writer: anytype) !void {
@@ -287,33 +288,11 @@ pub fn writeAdhocSignature(
self.code_directory.inner.nCodeSlots = total_pages;
// Calculate hash for each page (in file) and write it to the buffer
- var wg: WaitGroup = .{};
- {
- const buffer = try gpa.alloc(u8, self.page_size * total_pages);
- defer gpa.free(buffer);
-
- const results = try gpa.alloc(fs.File.PReadError!usize, total_pages);
- defer gpa.free(results);
- {
- wg.reset();
- defer wg.wait();
-
- var i: usize = 0;
- while (i < total_pages) : (i += 1) {
- const fstart = i * self.page_size;
- const fsize = if (fstart + self.page_size > opts.file_size)
- opts.file_size - fstart
- else
- self.page_size;
- const out_hash = &self.code_directory.code_slots.items[i];
- wg.start();
- try comp.thread_pool.spawn(workerSha256Hash, .{
- opts.file, fstart, buffer[fstart..][0..fsize], out_hash, &results[i], &wg,
- });
- }
- }
- for (results) |result| _ = try result;
- }
+ var hasher = Hasher(Sha256){};
+ try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{
+ .chunk_size = self.page_size,
+ .max_file_size = opts.file_size,
+ });
try blobs.append(.{ .code_directory = &self.code_directory });
header.length += @sizeOf(macho.BlobIndex);
@@ -352,7 +331,7 @@ pub fn writeAdhocSignature(
}
self.code_directory.inner.hashOffset =
- @sizeOf(macho.CodeDirectory) + @intCast(u32, self.code_directory.ident.len + 1) + self.code_directory.inner.nSpecialSlots * hash_size;
+ @sizeOf(macho.CodeDirectory) + @intCast(u32, self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size);
self.code_directory.inner.length = self.code_directory.size();
header.length += self.code_directory.size();
@@ -372,19 +351,6 @@ pub fn writeAdhocSignature(
}
}
-fn workerSha256Hash(
- file: fs.File,
- fstart: usize,
- buffer: []u8,
- hash: *[hash_size]u8,
- err: *fs.File.PReadError!usize,
- wg: *WaitGroup,
-) void {
- defer wg.finish();
- err.* = file.preadAll(buffer, fstart);
- Sha256.hash(buffer, hash, .{});
-}
-
pub fn size(self: CodeSignature) u32 {
var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
if (self.requirements) |req| {
src/link/MachO/DebugSymbols.zig
@@ -5,6 +5,7 @@ const build_options = @import("build_options");
const assert = std.debug.assert;
const fs = std.fs;
const link = @import("../../link.zig");
+const load_commands = @import("load_commands.zig");
const log = std.log.scoped(.dsym);
const macho = std.macho;
const makeStaticString = MachO.makeStaticString;
@@ -303,10 +304,7 @@ pub fn flushModule(self: *DebugSymbols, macho_file: *MachO) !void {
self.finalizeDwarfSegment(macho_file);
try self.writeLinkeditSegmentData(macho_file, &ncmds, lc_writer);
- {
- try lc_writer.writeStruct(macho_file.uuid);
- ncmds += 1;
- }
+ try load_commands.writeUuidLC(&macho_file.uuid.buf, &ncmds, lc_writer);
var headers_buf = std.ArrayList(u8).init(self.allocator);
defer headers_buf.deinit();
src/link/MachO/hasher.zig
@@ -0,0 +1,60 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const ThreadPool = @import("../../ThreadPool.zig");
+const WaitGroup = @import("../../WaitGroup.zig");
+
+pub fn ParallelHasher(comptime Hasher: type) type {
+ const hash_size = Hasher.digest_length;
+
+ return struct {
+ pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct {
+ chunk_size: u16 = 0x4000,
+ max_file_size: ?u64 = null,
+ }) !void {
+ _ = self;
+
+ var wg: WaitGroup = .{};
+
+ const file_size = opts.max_file_size orelse try file.getEndPos();
+ const total_num_chunks = mem.alignForward(file_size, opts.chunk_size) / opts.chunk_size;
+ assert(out.len >= total_num_chunks);
+
+ const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks);
+ defer gpa.free(buffer);
+
+ const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
+ defer gpa.free(results);
+
+ {
+ wg.reset();
+ defer wg.wait();
+
+ var i: usize = 0;
+ while (i < total_num_chunks) : (i += 1) {
+ const fstart = i * opts.chunk_size;
+ const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
+ wg.start();
+ try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg });
+ }
+ }
+ for (results) |result| _ = try result;
+ }
+
+ fn worker(
+ file: fs.File,
+ fstart: usize,
+ buffer: []u8,
+ out: *[hash_size]u8,
+ err: *fs.File.PReadError!usize,
+ wg: *WaitGroup,
+ ) void {
+ defer wg.finish();
+ err.* = file.preadAll(buffer, fstart);
+ Hasher.hash(buffer, out, .{});
+ }
+ };
+}
src/link/MachO/uuid.zig
@@ -0,0 +1,69 @@
+const std = @import("std");
+const fs = std.fs;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const Compilation = @import("../../Compilation.zig");
+const Md5 = std.crypto.hash.Md5;
+const Hasher = @import("hasher.zig").ParallelHasher;
+
+/// Somewhat random chunk size for MD5 hash calculation.
+pub const chunk_size = 0x4000;
+
+/// Calculates Md5 hash of the file contents.
+/// Hash is calculated in a streaming manner which may be slow.
+pub fn calcUuidStreaming(file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
+ const total_num_chunks = mem.alignForward(file_size, chunk_size) / chunk_size;
+
+ var hasher = Md5.init(.{});
+ var buffer: [chunk_size]u8 = undefined;
+
+ var i: usize = 0;
+ while (i < total_num_chunks) : (i += 1) {
+ const start = i * chunk_size;
+ const size = if (start + chunk_size > file_size)
+ file_size - start
+ else
+ chunk_size;
+ const amt = try file.preadAll(&buffer, start);
+ if (amt != size) return error.InputOutput;
+
+ hasher.update(buffer[0..size]);
+ }
+
+ hasher.final(out);
+ conform(out);
+}
+
+/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
+/// the final digest.
+/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
+/// and we will use it too as it seems accepted by Apple OSes.
+pub fn calcUuidParallel(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
+ const total_hashes = mem.alignForward(file_size, chunk_size) / chunk_size;
+
+ const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes);
+ defer comp.gpa.free(hashes);
+
+ var hasher = Hasher(Md5){};
+ try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{
+ .chunk_size = chunk_size,
+ .max_file_size = file_size,
+ });
+
+ const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length);
+ defer comp.gpa.free(final_buffer);
+
+ for (hashes) |hash, i| {
+ mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
+ }
+
+ Md5.hash(final_buffer, out, .{});
+ conform(out);
+}
+
+inline fn conform(out: *[Md5.digest_length]u8) void {
+ // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
+ out[6] = (out[6] & 0x0F) | (3 << 4);
+ out[8] = (out[8] & 0x3F) | 0x80;
+}
src/link/MachO/zld.zig
@@ -4037,8 +4037,15 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
const uuid_offset_backpatch: ?usize = blk: {
const index = lc_buffer.items.len;
var uuid_buf: [16]u8 = [_]u8{0} ** 16;
+
+ if (zld.options.optimize_mode == .Debug) {
+ // In Debug we don't really care about reproducibility, so put in a random value
+ // and be done with it.
+ std.crypto.random.bytes(&uuid_buf);
+ }
+
try load_commands.writeUuidLC(&uuid_buf, &ncmds, lc_writer);
- break :blk index;
+ break :blk if (zld.options.optimize_mode == .Debug) null else index;
};
try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), &ncmds, lc_writer);
@@ -4076,7 +4083,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
const seg = zld.getLinkeditSegmentPtr();
const file_size = seg.fileoff + seg.filesize;
var uuid_buf: [16]u8 = undefined;
- try uuid.calcMd5Hash(zld.gpa, zld.file, file_size, &uuid_buf);
+ try uuid.calcUuidParallel(comp, zld.file, file_size, &uuid_buf);
const offset = @sizeOf(macho.mach_header_64) + headers_buf.items.len + backpatch + @sizeOf(macho.load_command);
try zld.file.pwriteAll(&uuid_buf, offset);
}
src/link/MachO.zig
@@ -99,10 +99,10 @@ page_size: u16,
/// fashion (default for LLVM backend).
mode: enum { incremental, one_shot },
-uuid: macho.uuid_command = .{
- .cmdsize = @sizeOf(macho.uuid_command),
- .uuid = undefined,
-},
+uuid: struct {
+ buf: [16]u8 = undefined,
+ final: bool = false,
+} = .{},
dylibs: std.ArrayListUnmanaged(Dylib) = .{},
dylibs_map: std.StringHashMapUnmanaged(u16) = .{},
@@ -588,11 +588,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
try load_commands.writeBuildVersionLC(&self.base.options, &ncmds, lc_writer);
- {
- std.crypto.random.bytes(&self.uuid.uuid);
- try lc_writer.writeStruct(self.uuid);
- ncmds += 1;
+ if (!self.uuid.final) {
+ std.crypto.random.bytes(&self.uuid.buf);
+ self.uuid.final = true;
}
+ try load_commands.writeUuidLC(&self.uuid.buf, &ncmds, lc_writer);
try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), &ncmds, lc_writer);
CMakeLists.txt
@@ -591,6 +591,7 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
+ "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig"