Commit df5085bde0

Motiejus Jakštys <motiejus@uber.com>
2023-04-25 15:57:43
stage2: implement --build-id styles
1 parent c696648
Changed files (7)
lib/std/Build/Step/Compile.zig
@@ -116,7 +116,7 @@ each_lib_rpath: ?bool = null,
 /// As an example, the bloaty project refuses to work unless its inputs have
 /// build ids, in order to prevent accidental mismatches.
 /// The default is to not include this section because it slows down linking.
-build_id: ?bool = null,
+build_id: ?BuildId = null,
 
 /// Create a .eh_frame_hdr section and a PT_GNU_EH_FRAME segment in the ELF
 /// file.
@@ -288,6 +288,68 @@ pub const Options = struct {
     use_lld: ?bool = null,
 };
 
+pub const BuildId = union(enum) {
+    none,
+    fast,
+    uuid,
+    sha1,
+    md5,
+    hexstring: []const u8,
+
+    pub fn hash(self: BuildId, hasher: anytype) void {
+        switch (self) {
+            .none, .fast, .uuid, .sha1, .md5 => {
+                hasher.update(@tagName(self));
+            },
+            .hexstring => |str| {
+                hasher.update("0x");
+                hasher.update(str);
+            },
+        }
+    }
+
+    // parses the incoming BuildId. If returns a hexstring, it is allocated
+    // by the provided allocator.
+    pub fn parse(allocator: std.mem.Allocator, text: []const u8) error{
+        InvalidHexInt,
+        InvalidBuildId,
+        OutOfMemory,
+    }!BuildId {
+        if (mem.eql(u8, text, "none")) {
+            return .none;
+        } else if (mem.eql(u8, text, "fast")) {
+            return .fast;
+        } else if (mem.eql(u8, text, "uuid")) {
+            return .uuid;
+        } else if (mem.eql(u8, text, "sha1") or mem.eql(u8, text, "tree")) {
+            return .sha1;
+        } else if (mem.eql(u8, text, "md5")) {
+            return .md5;
+        } else if (mem.startsWith(u8, text, "0x")) {
+            var clean_hex_string = try allocator.alloc(u8, text.len);
+            errdefer allocator.free(clean_hex_string);
+
+            var i: usize = 0;
+            for (text["0x".len..]) |c| {
+                if (std.ascii.isHex(c)) {
+                    clean_hex_string[i] = c;
+                    i += 1;
+                } else if (c == '-' or c == ':') {
+                    continue;
+                } else {
+                    return error.InvalidHexInt;
+                }
+            }
+            if (i < text.len)
+                _ = allocator.resize(clean_hex_string, i);
+
+            return BuildId{ .hexstring = clean_hex_string[0..i] };
+        }
+
+        return error.InvalidBuildId;
+    }
+};
+
 pub const Kind = enum {
     exe,
     lib,
@@ -1810,7 +1872,13 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
 
     try addFlag(&zig_args, "valgrind", self.valgrind_support);
     try addFlag(&zig_args, "each-lib-rpath", self.each_lib_rpath);
-    try addFlag(&zig_args, "build-id", self.build_id);
+    if (self.build_id) |build_id| {
+        const fmt_str = "--build-id={s}{s}";
+        try zig_args.append(switch (build_id) {
+            .hexstring => |str| try std.fmt.allocPrint(b.allocator, fmt_str, .{ "0x", str }),
+            .none, .fast, .uuid, .sha1, .md5 => try std.fmt.allocPrint(b.allocator, fmt_str, .{ "", @tagName(build_id) }),
+        });
+    }
 
     if (self.zig_lib_dir) |dir| {
         try zig_args.append("--zig-lib-dir");
@@ -2175,3 +2243,50 @@ fn checkCompileErrors(self: *Compile) !void {
         \\=========================================
     , .{ expected_generated.items, actual_stderr });
 }
+
+const testing = std.testing;
+
+test "BuildId.parse" {
+    const tests = &[_]struct {
+        []const u8,
+        ?BuildId,
+        ?anyerror,
+    }{
+        .{ "0x", BuildId{ .hexstring = "" }, null },
+        .{ "0x12-34:", BuildId{ .hexstring = "1234" }, null },
+        .{ "0x123456", BuildId{ .hexstring = "123456" }, null },
+        .{ "md5", .md5, null },
+        .{ "none", .none, null },
+        .{ "fast", .fast, null },
+        .{ "uuid", .uuid, null },
+        .{ "sha1", .sha1, null },
+        .{ "tree", .sha1, null },
+        .{ "0xfoobbb", null, error.InvalidHexInt },
+        .{ "yaddaxxx", null, error.InvalidBuildId },
+    };
+
+    for (tests) |tt| {
+        const input = tt[0];
+        const expected = tt[1];
+        const expected_err = tt[2];
+
+        _ = (if (expected_err) |err| {
+            try testing.expectError(err, BuildId.parse(testing.allocator, input));
+        } else blk: {
+            const actual = BuildId.parse(testing.allocator, input) catch |e| break :blk e;
+            switch (expected.?) {
+                .hexstring => |expected_str| {
+                    try testing.expectEqualStrings(expected_str, actual.hexstring);
+                    testing.allocator.free(actual.hexstring);
+                },
+                else => try testing.expectEqual(expected.?, actual),
+            }
+        }) catch |e| {
+            std.log.err(
+                "BuildId.parse failed on {s}: expected {} got {!}",
+                .{ input, expected.?, e },
+            );
+            return e;
+        };
+    }
+}
src/link/Elf.zig
@@ -1399,7 +1399,8 @@ fn linkWithLLD(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node) !v
         man.hash.add(self.base.options.each_lib_rpath);
         if (self.base.options.output_mode == .Exe) {
             man.hash.add(stack_size);
-            man.hash.add(self.base.options.build_id);
+            if (self.base.options.build_id) |build_id|
+                build_id.hash(&man.hash.hasher);
         }
         man.hash.addListOfBytes(self.base.options.symbol_wrap_set.keys());
         man.hash.add(self.base.options.skip_linker_dependencies);
@@ -1542,8 +1543,12 @@ fn linkWithLLD(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node) !v
             try argv.append("-z");
             try argv.append(try std.fmt.allocPrint(arena, "stack-size={d}", .{stack_size}));
 
-            if (self.base.options.build_id) {
-                try argv.append("--build-id");
+            if (self.base.options.build_id) |build_id| {
+                const fmt_str = "--build-id={s}{s}";
+                try argv.append(switch (build_id) {
+                    .hexstring => |str| try std.fmt.allocPrint(arena, fmt_str, .{ "0x", str }),
+                    .none, .fast, .uuid, .sha1, .md5 => try std.fmt.allocPrint(arena, fmt_str, .{ "", @tagName(build_id) }),
+                });
             }
         }
 
src/link/Wasm.zig
@@ -3163,7 +3163,8 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l
         try man.addOptionalFile(compiler_rt_path);
         man.hash.addOptionalBytes(options.entry);
         man.hash.addOptional(options.stack_size_override);
-        man.hash.add(wasm.base.options.build_id);
+        if (wasm.base.options.build_id) |build_id|
+            build_id.hash(&man.hash.hasher);
         man.hash.add(options.import_memory);
         man.hash.add(options.import_table);
         man.hash.add(options.export_table);
@@ -3797,8 +3798,27 @@ fn writeToFile(
     if (!wasm.base.options.strip) {
         // The build id must be computed on the main sections only,
         // so we have to do it now, before the debug sections.
-        if (wasm.base.options.build_id) {
-            try emitBuildIdSection(&binary_bytes);
+        if (wasm.base.options.build_id) |build_id| {
+            switch (build_id) {
+                .none => {},
+                .fast => {
+                    var id: [16]u8 = undefined;
+                    std.crypto.hash.sha3.TurboShake128(null).hash(binary_bytes.items, &id, .{});
+                    var uuid: [36]u8 = undefined;
+                    _ = try std.fmt.bufPrint(&uuid, "{s}-{s}-{s}-{s}-{s}", .{
+                        std.fmt.fmtSliceHexLower(id[0..4]),
+                        std.fmt.fmtSliceHexLower(id[4..6]),
+                        std.fmt.fmtSliceHexLower(id[6..8]),
+                        std.fmt.fmtSliceHexLower(id[8..10]),
+                        std.fmt.fmtSliceHexLower(id[10..]),
+                    });
+                    try emitBuildIdSection(&binary_bytes, &uuid);
+                },
+                .hexstring => |str| {
+                    try emitBuildIdSection(&binary_bytes, str);
+                },
+                else => |mode| log.err("build-id '{s}' is not supported for WASM", .{@tagName(mode)}),
+            }
         }
 
         // if (wasm.dwarf) |*dwarf| {
@@ -3942,25 +3962,17 @@ fn emitProducerSection(binary_bytes: *std.ArrayList(u8)) !void {
     );
 }
 
-fn emitBuildIdSection(binary_bytes: *std.ArrayList(u8)) !void {
+fn emitBuildIdSection(binary_bytes: *std.ArrayList(u8), build_id: []const u8) !void {
     const header_offset = try reserveCustomSectionHeader(binary_bytes);
 
     const writer = binary_bytes.writer();
-    const build_id = "build_id";
-    try leb.writeULEB128(writer, @intCast(u32, build_id.len));
-    try writer.writeAll(build_id);
-
-    var id: [16]u8 = undefined;
-    std.crypto.hash.sha3.TurboShake128(null).hash(binary_bytes.items, &id, .{});
-    var uuid: [36]u8 = undefined;
-    _ = try std.fmt.bufPrint(&uuid, "{s}-{s}-{s}-{s}-{s}", .{
-        std.fmt.fmtSliceHexLower(id[0..4]),  std.fmt.fmtSliceHexLower(id[4..6]), std.fmt.fmtSliceHexLower(id[6..8]),
-        std.fmt.fmtSliceHexLower(id[8..10]), std.fmt.fmtSliceHexLower(id[10..]),
-    });
+    const hdr_build_id = "build_id";
+    try leb.writeULEB128(writer, @intCast(u32, hdr_build_id.len));
+    try writer.writeAll(hdr_build_id);
 
     try leb.writeULEB128(writer, @as(u32, 1));
-    try leb.writeULEB128(writer, @as(u32, uuid.len));
-    try writer.writeAll(&uuid);
+    try leb.writeULEB128(writer, @intCast(u32, build_id.len));
+    try writer.writeAll(build_id);
 
     try writeCustomSectionHeader(
         binary_bytes.items,
@@ -4199,7 +4211,8 @@ fn linkWithLLD(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) !
         try man.addOptionalFile(compiler_rt_path);
         man.hash.addOptionalBytes(wasm.base.options.entry);
         man.hash.addOptional(wasm.base.options.stack_size_override);
-        man.hash.add(wasm.base.options.build_id);
+        if (wasm.base.options.build_id) |build_id|
+            build_id.hash(&man.hash.hasher);
         man.hash.add(wasm.base.options.import_memory);
         man.hash.add(wasm.base.options.import_table);
         man.hash.add(wasm.base.options.export_table);
src/Compilation.zig
@@ -29,6 +29,7 @@ const wasi_libc = @import("wasi_libc.zig");
 const fatal = @import("main.zig").fatal;
 const clangMain = @import("main.zig").clangMain;
 const Module = @import("Module.zig");
+const BuildId = std.Build.CompileStep.BuildId;
 const Cache = std.Build.Cache;
 const translate_c = @import("translate_c.zig");
 const clang = @import("clang.zig");
@@ -563,7 +564,7 @@ pub const InitOptions = struct {
     linker_print_map: bool = false,
     linker_opt_bisect_limit: i32 = -1,
     each_lib_rpath: ?bool = null,
-    build_id: ?bool = null,
+    build_id: ?BuildId = null,
     disable_c_depfile: bool = false,
     linker_z_nodelete: bool = false,
     linker_z_notext: bool = false,
@@ -797,7 +798,6 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
         const unwind_tables = options.want_unwind_tables orelse
             (link_libunwind or target_util.needUnwindTables(options.target));
         const link_eh_frame_hdr = options.link_eh_frame_hdr or unwind_tables;
-        const build_id = options.build_id orelse false;
 
         // Make a decision on whether to use LLD or our own linker.
         const use_lld = options.use_lld orelse blk: {
@@ -828,7 +828,7 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
                 options.output_mode == .Lib or
                 options.linker_script != null or options.version_script != null or
                 options.emit_implib != null or
-                build_id or
+                options.build_id != null or
                 options.symbol_wrap_set.count() > 0)
             {
                 break :blk true;
@@ -1514,7 +1514,7 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
             .skip_linker_dependencies = options.skip_linker_dependencies,
             .parent_compilation_link_libc = options.parent_compilation_link_libc,
             .each_lib_rpath = options.each_lib_rpath orelse options.is_native_os,
-            .build_id = build_id,
+            .build_id = options.build_id,
             .cache_mode = cache_mode,
             .disable_lld_caching = options.disable_lld_caching or cache_mode == .whole,
             .subsystem = options.subsystem,
@@ -2269,7 +2269,9 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes
     man.hash.addListOfBytes(comp.bin_file.options.rpath_list);
     man.hash.addListOfBytes(comp.bin_file.options.symbol_wrap_set.keys());
     man.hash.add(comp.bin_file.options.each_lib_rpath);
-    man.hash.add(comp.bin_file.options.build_id);
+    if (comp.bin_file.options.build_id) |build_id| {
+        build_id.hash(&man.hash.hasher);
+    }
     man.hash.add(comp.bin_file.options.skip_linker_dependencies);
     man.hash.add(comp.bin_file.options.z_nodelete);
     man.hash.add(comp.bin_file.options.z_notext);
src/link.zig
@@ -10,6 +10,7 @@ const wasi_libc = @import("wasi_libc.zig");
 
 const Air = @import("Air.zig");
 const Allocator = std.mem.Allocator;
+const BuildId = std.Build.CompileStep.BuildId;
 const Cache = std.Build.Cache;
 const Compilation = @import("Compilation.zig");
 const LibCInstallation = @import("libc_installation.zig").LibCInstallation;
@@ -157,7 +158,7 @@ pub const Options = struct {
     skip_linker_dependencies: bool,
     parent_compilation_link_libc: bool,
     each_lib_rpath: bool,
-    build_id: bool,
+    build_id: ?BuildId,
     disable_lld_caching: bool,
     is_test: bool,
     hash_style: HashStyle,
src/main.zig
@@ -22,6 +22,7 @@ const LibCInstallation = @import("libc_installation.zig").LibCInstallation;
 const wasi_libc = @import("wasi_libc.zig");
 const translate_c = @import("translate_c.zig");
 const clang = @import("clang.zig");
+const BuildId = std.Build.CompileStep.BuildId;
 const Cache = std.Build.Cache;
 const target_util = @import("target.zig");
 const crash_report = @import("crash_report.zig");
@@ -493,8 +494,7 @@ const usage_build_generic =
     \\  -fno-each-lib-rpath            Prevent adding rpath for each used dynamic library
     \\  -fallow-shlib-undefined        Allows undefined symbols in shared libraries
     \\  -fno-allow-shlib-undefined     Disallows undefined symbols in shared libraries
-    \\  -fbuild-id                     Helps coordinate stripped binaries with debug symbols
-    \\  -fno-build-id                  (default) Saves a bit of time linking
+    \\  --build-id[=style]             Generate a build ID note
     \\  --eh-frame-hdr                 Enable C++ exception handling by passing --eh-frame-hdr to linker
     \\  --emit-relocs                  Enable output of relocation sections for post build tools
     \\  -z [arg]                       Set linker extension flags
@@ -817,7 +817,7 @@ fn buildOutputType(
     var link_eh_frame_hdr = false;
     var link_emit_relocs = false;
     var each_lib_rpath: ?bool = null;
-    var build_id: ?bool = null;
+    var build_id: ?BuildId = null;
     var sysroot: ?[]const u8 = null;
     var libc_paths_file: ?[]const u8 = try optionalStringEnvVar(arena, "ZIG_LIBC");
     var machine_code_model: std.builtin.CodeModel = .default;
@@ -1202,10 +1202,6 @@ fn buildOutputType(
                         each_lib_rpath = true;
                     } else if (mem.eql(u8, arg, "-fno-each-lib-rpath")) {
                         each_lib_rpath = false;
-                    } else if (mem.eql(u8, arg, "-fbuild-id")) {
-                        build_id = true;
-                    } else if (mem.eql(u8, arg, "-fno-build-id")) {
-                        build_id = false;
                     } else if (mem.eql(u8, arg, "--test-cmd-bin")) {
                         try test_exec_args.append(null);
                     } else if (mem.eql(u8, arg, "--test-evented-io")) {
@@ -1446,6 +1442,15 @@ fn buildOutputType(
                         linker_gc_sections = true;
                     } else if (mem.eql(u8, arg, "--no-gc-sections")) {
                         linker_gc_sections = false;
+                    } else if (mem.eql(u8, arg, "--build-id")) {
+                        build_id = .fast;
+                    } else if (mem.startsWith(u8, arg, "--build-id=")) {
+                        const value = arg["--build-id=".len..];
+                        build_id = BuildId.parse(arena, value) catch |err| switch (err) {
+                            error.InvalidHexInt => fatal("failed to parse hex value {s}", .{value}),
+                            error.InvalidBuildId => fatal("invalid --build-id={s}", .{value}),
+                            error.OutOfMemory => fatal("OOM", .{}),
+                        };
                     } else if (mem.eql(u8, arg, "--debug-compile-errors")) {
                         if (!crash_report.is_enabled) {
                             std.log.warn("Zig was compiled in a release mode. --debug-compile-errors has no effect.", .{});
@@ -1684,11 +1689,7 @@ fn buildOutputType(
                                 if (mem.indexOfScalar(u8, linker_arg, '=')) |equals_pos| {
                                     const key = linker_arg[0..equals_pos];
                                     const value = linker_arg[equals_pos + 1 ..];
-                                    if (mem.eql(u8, key, "build-id")) {
-                                        build_id = true;
-                                        warn("ignoring build-id style argument: '{s}'", .{value});
-                                        continue;
-                                    } else if (mem.eql(u8, key, "--sort-common")) {
+                                    if (mem.eql(u8, key, "--sort-common")) {
                                         // this ignores --sort=common=<anything>; ignoring plain --sort-common
                                         // is done below.
                                         continue;
@@ -1730,6 +1731,15 @@ fn buildOutputType(
                                 search_strategy = .paths_first;
                             } else if (mem.eql(u8, linker_arg, "-search_dylibs_first")) {
                                 search_strategy = .dylibs_first;
+                            } else if (mem.eql(u8, linker_arg, "--build-id")) {
+                                build_id = .fast;
+                            } else if (mem.startsWith(u8, linker_arg, "--build-id=")) {
+                                const value = linker_arg["--build-id=".len..];
+                                build_id = BuildId.parse(arena, value) catch |err| switch (err) {
+                                    error.InvalidHexInt => fatal("failed to parse hex value {s}", .{value}),
+                                    error.InvalidBuildId => fatal("invalid --build-id={s}", .{value}),
+                                    error.OutOfMemory => fatal("OOM", .{}),
+                                };
                             } else {
                                 try linker_args.append(linker_arg);
                             }
build.zig
@@ -165,8 +165,11 @@ pub fn build(b: *std.Build) !void {
     exe.strip = strip;
     exe.pie = pie;
     exe.sanitize_thread = sanitize_thread;
-    exe.build_id = b.option(bool, "build-id", "Include a build id note") orelse false;
     exe.entitlements = entitlements;
+
+    if (b.option([]const u8, "build-id", "Include a build id note")) |build_id|
+        exe.build_id = try std.Build.CompileStep.BuildId.parse(b.allocator, build_id);
+
     b.installArtifact(exe);
 
     test_step.dependOn(&exe.step);