Commit 585c21e54d

Jakub Konka <kubkon@jakubkonka.com>
2022-12-15 23:43:50
macho: move parallel file hashing back to CodeSignature
I need to think some more how to calculate UUID in parallel, if it is even possible, to preserve UUID's determinism.
1 parent 1928ed7
Changed files (4)
src/link/MachO/CodeSignature.zig
@@ -10,8 +10,9 @@ const testing = std.testing;
 
 const Allocator = mem.Allocator;
 const Compilation = @import("../../Compilation.zig");
-const Hasher = @import("hasher.zig").ParallelHasher;
 const Sha256 = std.crypto.hash.sha2.Sha256;
+const ThreadPool = @import("../../ThreadPool.zig");
+const WaitGroup = @import("../../WaitGroup.zig");
 
 const hash_size = Sha256.digest_length;
 
@@ -288,11 +289,7 @@ pub fn writeAdhocSignature(
     self.code_directory.inner.nCodeSlots = total_pages;
 
     // Calculate hash for each page (in file) and write it to the buffer
-    var hasher = Hasher(Sha256){};
-    try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{
-        .chunk_size = self.page_size,
-        .max_file_size = opts.file_size,
-    });
+    try self.parallelHash(gpa, comp.thread_pool, opts.file, opts.file_size);
 
     try blobs.append(.{ .code_directory = &self.code_directory });
     header.length += @sizeOf(macho.BlobIndex);
@@ -351,6 +348,62 @@ pub fn writeAdhocSignature(
     }
 }
 
+fn parallelHash(
+    self: *CodeSignature,
+    gpa: Allocator,
+    pool: *ThreadPool,
+    file: fs.File,
+    file_size: u64,
+) !void {
+    var wg: WaitGroup = .{};
+
+    const total_num_chunks = mem.alignForward(file_size, self.page_size) / self.page_size;
+    assert(self.code_directory.code_slots.items.len >= total_num_chunks);
+
+    const buffer = try gpa.alloc(u8, self.page_size * total_num_chunks);
+    defer gpa.free(buffer);
+
+    const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
+    defer gpa.free(results);
+
+    {
+        wg.reset();
+        defer wg.wait();
+
+        var i: usize = 0;
+        while (i < total_num_chunks) : (i += 1) {
+            const fstart = i * self.page_size;
+            const fsize = if (fstart + self.page_size > file_size)
+                file_size - fstart
+            else
+                self.page_size;
+            wg.start();
+            try pool.spawn(worker, .{
+                file,
+                fstart,
+                buffer[fstart..][0..fsize],
+                &self.code_directory.code_slots.items[i],
+                &results[i],
+                &wg,
+            });
+        }
+    }
+    for (results) |result| _ = try result;
+}
+
+fn worker(
+    file: fs.File,
+    fstart: usize,
+    buffer: []u8,
+    out: *[hash_size]u8,
+    err: *fs.File.PReadError!usize,
+    wg: *WaitGroup,
+) void {
+    defer wg.finish();
+    err.* = file.preadAll(buffer, fstart);
+    Sha256.hash(buffer, out, .{});
+}
+
 pub fn size(self: CodeSignature) u32 {
     var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
     if (self.requirements) |req| {
src/link/MachO/hasher.zig
@@ -1,68 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const fs = std.fs;
-const mem = std.mem;
-
-const Allocator = mem.Allocator;
-const ThreadPool = @import("../../ThreadPool.zig");
-const WaitGroup = @import("../../WaitGroup.zig");
-
-pub fn ParallelHasher(comptime Hasher: type) type {
-    const hash_size = Hasher.digest_length;
-
-    return struct {
-        pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct {
-            chunk_size: u16 = 0x4000,
-            file_pos: u64 = 0,
-            max_file_size: ?u64 = null,
-        }) !void {
-            _ = self;
-
-            var wg: WaitGroup = .{};
-
-            const file_size = opts.max_file_size orelse try file.getEndPos();
-            const total_num_chunks = mem.alignForward(file_size, opts.chunk_size) / opts.chunk_size;
-            assert(out.len >= total_num_chunks);
-
-            const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks);
-            defer gpa.free(buffer);
-
-            const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
-            defer gpa.free(results);
-
-            {
-                wg.reset();
-                defer wg.wait();
-
-                var i: usize = 0;
-                while (i < total_num_chunks) : (i += 1) {
-                    const fstart = i * opts.chunk_size;
-                    const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
-                    wg.start();
-                    try pool.spawn(worker, .{
-                        file,
-                        fstart + opts.file_pos,
-                        buffer[fstart..][0..fsize],
-                        &out[i],
-                        &results[i],
-                        &wg,
-                    });
-                }
-            }
-            for (results) |result| _ = try result;
-        }
-
-        fn worker(
-            file: fs.File,
-            fstart: usize,
-            buffer: []u8,
-            out: *[hash_size]u8,
-            err: *fs.File.PReadError!usize,
-            wg: *WaitGroup,
-        ) void {
-            defer wg.finish();
-            err.* = file.preadAll(buffer, fstart);
-            Hasher.hash(buffer, out, .{});
-        }
-    };
-}
src/link/MachO/zld.zig
@@ -2692,7 +2692,12 @@ pub const Zld = struct {
                 conformUuid(&self.uuid_cmd.uuid);
             },
             else => {
-                const max_file_size = self.symtab_cmd.stroff + self.symtab_cmd.strsize;
+                const max_file_end = self.symtab_cmd.stroff + self.symtab_cmd.strsize;
+
+                const FileSubsection = struct {
+                    start: u32,
+                    end: u32,
+                };
 
                 var subsections: [5]FileSubsection = undefined;
                 var count: usize = 0;
@@ -2743,7 +2748,7 @@ pub const Zld = struct {
                                 @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
                             else
                                 @sizeOf(macho.linkedit_data_command),
-                            .end = max_file_size,
+                            .end = max_file_end,
                         };
                         count += 1;
                     } else {
@@ -2773,7 +2778,7 @@ pub const Zld = struct {
                             @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
                         else
                             @sizeOf(macho.linkedit_data_command),
-                        .end = max_file_size,
+                        .end = max_file_end,
                     };
                     count += 1;
                 }
@@ -2816,29 +2821,6 @@ pub const Zld = struct {
         out[8] = (out[8] & 0x3F) | 0x80;
     }
 
-    const FileSubsection = struct {
-        start: u32,
-        end: u32,
-    };
-
-    // fn calcUuidHashes(
-    //     self: *Zld,
-    //     comp: *const Compilation,
-    //     cut: FileSubsection,
-    //     hashes: *std.ArrayList([Md5.digest_length]u8),
-    // ) !void {
-    //     const chunk_size = 0x4000;
-    //     const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size;
-    //     try hashes.resize(hashes.items.len + total_hashes);
-
-    //     var hasher = Hasher(Md5){};
-    //     try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{
-    //         .chunk_size = chunk_size,
-    //         .file_pos = cut.start,
-    //         .max_file_size = cut.end - cut.start,
-    //     });
-    // }
-
     fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void {
         const seg = self.getLinkeditSegmentPtr();
         // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file
CMakeLists.txt
@@ -591,7 +591,6 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
-    "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig"