Commit 95720f007b

David Rubin <daviru007@icloud.com>
2024-12-26 06:10:02
move libubsan to `lib/` and integrate it into `-fubsan-rt`
1 parent babee5f
lib/std/std.zig
@@ -44,7 +44,6 @@ pub const Thread = @import("Thread.zig");
 pub const Treap = @import("treap.zig").Treap;
 pub const Tz = tz.Tz;
 pub const Uri = @import("Uri.zig");
-pub const ubsan = @import("ubsan.zig");
 
 pub const array_hash_map = @import("array_hash_map.zig");
 pub const atomic = @import("atomic.zig");
lib/std/ubsan.zig → lib/ubsan.zig
@@ -73,8 +73,8 @@ const Value = extern struct {
         const size = value.type_descriptor.getIntegerSize();
         const max_inline_size = @bitSizeOf(ValueHandle);
         if (size <= max_inline_size) {
-            const extra_bits: u6 = @intCast(max_inline_size - size);
-            const handle: i64 = @bitCast(@intFromPtr(value.handle));
+            const extra_bits: std.math.Log2Int(usize) = @intCast(max_inline_size - size);
+            const handle: isize = @bitCast(@intFromPtr(value.handle));
             return (handle << extra_bits) >> extra_bits;
         }
         return switch (size) {
@@ -137,7 +137,7 @@ fn overflowHandler(
 ) void {
     const S = struct {
         fn handler(
-            data: *OverflowData,
+            data: *const OverflowData,
             lhs_handle: ValueHandle,
             rhs_handle: ValueHandle,
         ) callconv(.c) noreturn {
src/link/MachO/relocatable.zig
@@ -97,6 +97,10 @@ pub fn flushStaticLib(macho_file: *MachO, comp: *Compilation, module_obj_path: ?
         try positionals.append(try link.openObjectInput(diags, comp.compiler_rt_obj.?.full_object_path));
     }
 
+    if (comp.include_ubsan_rt) {
+        try positionals.append(try link.openObjectInput(diags, comp.ubsan_rt_obj.?.full_object_path));
+    }
+
     for (positionals.items) |link_input| {
         macho_file.classifyInputFile(link_input) catch |err|
             diags.addParseError(link_input.path().?, "failed to read input file: {s}", .{@errorName(err)});
src/link/Coff.zig
@@ -2162,6 +2162,15 @@ fn linkWithLLD(coff: *Coff, arena: Allocator, tid: Zcu.PerThread.Id, prog_node:
             try argv.append(try comp.fuzzer_lib.?.full_object_path.toString(arena));
         }
 
+        const ubsan_rt_path: ?Path = blk: {
+            if (comp.ubsan_rt_lib) |x| break :blk x.full_object_path;
+            if (comp.ubsan_rt_obj) |x| break :blk x.full_object_path;
+            break :blk null;
+        };
+        if (ubsan_rt_path) |path| {
+            try argv.append(try path.toString(arena));
+        }
+
         if (is_exe_or_dyn_lib and !comp.skip_linker_dependencies) {
             if (!comp.config.link_libc) {
                 if (comp.libc_static_lib) |lib| {
src/link/Elf.zig
@@ -1541,6 +1541,11 @@ fn linkWithLLD(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: s
         if (comp.compiler_rt_obj) |x| break :blk x.full_object_path;
         break :blk null;
     };
+    const ubsan_rt_path: ?Path = blk: {
+        if (comp.ubsan_rt_lib) |x| break :blk x.full_object_path;
+        if (comp.ubsan_rt_obj) |x| break :blk x.full_object_path;
+        break :blk null;
+    };
 
     // Here we want to determine whether we can save time by not invoking LLD when the
     // output is unchanged. None of the linker options or the object files that are being
@@ -1575,6 +1580,7 @@ fn linkWithLLD(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: s
         }
         try man.addOptionalFile(module_obj_path);
         try man.addOptionalFilePath(compiler_rt_path);
+        try man.addOptionalFilePath(ubsan_rt_path);
         try man.addOptionalFilePath(if (comp.tsan_lib) |l| l.full_object_path else null);
         try man.addOptionalFilePath(if (comp.fuzzer_lib) |l| l.full_object_path else null);
 
@@ -1974,6 +1980,10 @@ fn linkWithLLD(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: s
             try argv.append(try lib.full_object_path.toString(arena));
         }
 
+        if (ubsan_rt_path) |p| {
+            try argv.append(try p.toString(arena));
+        }
+
         // libc
         if (is_exe_or_dyn_lib and
             !comp.skip_linker_dependencies and
src/link/MachO.zig
@@ -344,11 +344,21 @@ pub fn deinit(self: *MachO) void {
     self.thunks.deinit(gpa);
 }
 
-pub fn flush(self: *MachO, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: std.Progress.Node) link.File.FlushError!void {
+pub fn flush(
+    self: *MachO,
+    arena: Allocator,
+    tid: Zcu.PerThread.Id,
+    prog_node: std.Progress.Node,
+) link.File.FlushError!void {
     try self.flushModule(arena, tid, prog_node);
 }
 
-pub fn flushModule(self: *MachO, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: std.Progress.Node) link.File.FlushError!void {
+pub fn flushModule(
+    self: *MachO,
+    arena: Allocator,
+    tid: Zcu.PerThread.Id,
+    prog_node: std.Progress.Node,
+) link.File.FlushError!void {
     const tracy = trace(@src());
     defer tracy.end();
 
@@ -409,6 +419,16 @@ pub fn flushModule(self: *MachO, arena: Allocator, tid: Zcu.PerThread.Id, prog_n
         try positionals.append(try link.openObjectInput(diags, comp.fuzzer_lib.?.full_object_path));
     }
 
+    if (comp.ubsan_rt_lib) |crt_file| {
+        const path = crt_file.full_object_path;
+        self.classifyInputFile(try link.openArchiveInput(diags, path, false, false)) catch |err|
+            diags.addParseError(path, "failed to parse archive: {s}", .{@errorName(err)});
+    } else if (comp.ubsan_rt_obj) |crt_file| {
+        const path = crt_file.full_object_path;
+        self.classifyInputFile(try link.openObjectInput(diags, path)) catch |err|
+            diags.addParseError(path, "failed to parse archive: {s}", .{@errorName(err)});
+    }
+
     for (positionals.items) |link_input| {
         self.classifyInputFile(link_input) catch |err|
             diags.addParseError(link_input.path().?, "failed to read input file: {s}", .{@errorName(err)});
@@ -813,6 +833,8 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void {
 
         if (comp.compiler_rt_lib) |lib| try argv.append(try lib.full_object_path.toString(arena));
         if (comp.compiler_rt_obj) |obj| try argv.append(try obj.full_object_path.toString(arena));
+        if (comp.ubsan_rt_lib) |lib| try argv.append(try lib.full_object_path.toString(arena));
+        if (comp.ubsan_rt_obj) |obj| try argv.append(try obj.full_object_path.toString(arena));
     }
 
     Compilation.dump_argv(argv.items);
src/link/Wasm.zig
@@ -3879,6 +3879,11 @@ fn linkWithLLD(wasm: *Wasm, arena: Allocator, tid: Zcu.PerThread.Id, prog_node:
         if (comp.compiler_rt_obj) |obj| break :blk obj.full_object_path;
         break :blk null;
     };
+    const ubsan_rt_path: ?Path = blk: {
+        if (comp.ubsan_rt_lib) |lib| break :blk lib.full_object_path;
+        if (comp.ubsan_rt_obj) |obj| break :blk obj.full_object_path;
+        break :blk null;
+    };
 
     const id_symlink_basename = "lld.id";
 
@@ -3901,6 +3906,7 @@ fn linkWithLLD(wasm: *Wasm, arena: Allocator, tid: Zcu.PerThread.Id, prog_node:
         }
         try man.addOptionalFile(module_obj_path);
         try man.addOptionalFilePath(compiler_rt_path);
+        try man.addOptionalFilePath(ubsan_rt_path);
         man.hash.addOptionalBytes(wasm.entry_name.slice(wasm));
         man.hash.add(wasm.base.stack_size);
         man.hash.add(wasm.base.build_id);
@@ -4148,6 +4154,10 @@ fn linkWithLLD(wasm: *Wasm, arena: Allocator, tid: Zcu.PerThread.Id, prog_node:
             try argv.append(try p.toString(arena));
         }
 
+        if (ubsan_rt_path) |p| {
+            try argv.append(try p.toStringZ(arena));
+        }
+
         if (comp.verbose_link) {
             // Skip over our own name so that the LLD linker name is the first argv item.
             Compilation.dump_argv(argv.items[1..]);
src/Compilation.zig
@@ -79,6 +79,7 @@ implib_emit: ?Path,
 docs_emit: ?Path,
 root_name: [:0]const u8,
 include_compiler_rt: bool,
+include_ubsan_rt: bool,
 /// Resolved into known paths, any GNU ld scripts already resolved.
 link_inputs: []const link.Input,
 /// Needed only for passing -F args to clang.
@@ -226,6 +227,12 @@ libunwind_static_lib: ?CrtFile = null,
 /// Populated when we build the TSAN library. A Job to build this is placed in the queue
 /// and resolved before calling linker.flush().
 tsan_lib: ?CrtFile = null,
+/// Populated when we build the UBSAN library. A Job to build this is placed in the queue
+/// and resolved before calling linker.flush().
+ubsan_rt_lib: ?CrtFile = null,
+/// Populated when we build the UBSAN object. A Job to build this is placed in the queue
+/// and resolved before calling linker.flush().
+ubsan_rt_obj: ?CrtFile = null,
 /// Populated when we build the libc static library. A Job to build this is placed in the queue
 /// and resolved before calling linker.flush().
 libc_static_lib: ?CrtFile = null,
@@ -283,6 +290,8 @@ digest: ?[Cache.bin_digest_len]u8 = null,
 const QueuedJobs = struct {
     compiler_rt_lib: bool = false,
     compiler_rt_obj: bool = false,
+    ubsan_rt_lib: bool = false,
+    ubsan_rt_obj: bool = false,
     fuzzer_lib: bool = false,
     update_builtin_zig: bool,
     musl_crt_file: [@typeInfo(musl.CrtFile).@"enum".fields.len]bool = @splat(false),
@@ -789,6 +798,7 @@ pub const MiscTask = enum {
     libcxx,
     libcxxabi,
     libtsan,
+    libubsan,
     libfuzzer,
     wasi_libc_crt_file,
     compiler_rt,
@@ -1064,6 +1074,7 @@ pub const CreateOptions = struct {
     /// Position Independent Executable. If the output mode is not an
     /// executable this field is ignored.
     want_compiler_rt: ?bool = null,
+    want_ubsan_rt: ?bool = null,
     want_lto: ?bool = null,
     function_sections: bool = false,
     data_sections: bool = false,
@@ -1297,6 +1308,9 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
         const include_compiler_rt = options.want_compiler_rt orelse
             (!options.skip_linker_dependencies and is_exe_or_dyn_lib);
 
+        const include_ubsan_rt = options.want_ubsan_rt orelse
+            (!options.skip_linker_dependencies and is_exe_or_dyn_lib);
+
         if (include_compiler_rt and output_mode == .Obj) {
             // For objects, this mechanism relies on essentially `_ = @import("compiler-rt");`
             // injected into the object.
@@ -1323,6 +1337,26 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
             try options.root_mod.deps.putNoClobber(arena, "compiler_rt", compiler_rt_mod);
         }
 
+        if (include_ubsan_rt and output_mode == .Obj) {
+            const ubsan_rt_mod = try Package.Module.create(arena, .{
+                .global_cache_directory = options.global_cache_directory,
+                .paths = .{
+                    .root = .{
+                        .root_dir = options.zig_lib_directory,
+                    },
+                    .root_src_path = "ubsan.zig",
+                },
+                .fully_qualified_name = "ubsan_rt",
+                .cc_argv = &.{},
+                .inherited = .{},
+                .global = options.config,
+                .parent = options.root_mod,
+                .builtin_mod = options.root_mod.getBuiltinDependency(),
+                .builtin_modules = null, // `builtin_mod` is set
+            });
+            try options.root_mod.deps.putNoClobber(arena, "ubsan_rt", ubsan_rt_mod);
+        }
+
         if (options.verbose_llvm_cpu_features) {
             if (options.root_mod.resolved_target.llvm_cpu_features) |cf| print: {
                 const target = options.root_mod.resolved_target.result;
@@ -1500,6 +1534,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
             .version = options.version,
             .libc_installation = libc_dirs.libc_installation,
             .include_compiler_rt = include_compiler_rt,
+            .include_ubsan_rt = include_ubsan_rt,
             .link_inputs = options.link_inputs,
             .framework_dirs = options.framework_dirs,
             .llvm_opt_bisect_limit = options.llvm_opt_bisect_limit,
@@ -1885,6 +1920,16 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
                 }
             }
 
+            if (comp.include_ubsan_rt and capable_of_building_compiler_rt) {
+                if (is_exe_or_dyn_lib) {
+                    log.debug("queuing a job to build ubsan_rt_lib", .{});
+                    comp.job_queued_ubsan_rt_lib = true;
+                } else if (output_mode != .Obj) {
+                    log.debug("queuing a job to build ubsan_rt_obj", .{});
+                    comp.job_queued_ubsan_rt_obj = true;
+                }
+            }
+
             if (is_exe_or_dyn_lib and comp.config.any_fuzz and capable_of_building_compiler_rt) {
                 log.debug("queuing a job to build libfuzzer", .{});
                 comp.queued_jobs.fuzzer_lib = true;
@@ -1937,9 +1982,16 @@ pub fn destroy(comp: *Compilation) void {
     if (comp.compiler_rt_obj) |*crt_file| {
         crt_file.deinit(gpa);
     }
+    if (comp.ubsan_rt_lib) |*crt_file| {
+        crt_file.deinit(gpa);
+    }
+    if (comp.ubsan_rt_obj) |*crt_file| {
+        crt_file.deinit(gpa);
+    }
     if (comp.fuzzer_lib) |*crt_file| {
         crt_file.deinit(gpa);
     }
+
     if (comp.libc_static_lib) |*crt_file| {
         crt_file.deinit(gpa);
     }
@@ -2207,6 +2259,10 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
             _ = try pt.importPkg(zcu.main_mod);
         }
 
+        if (zcu.root_mod.deps.get("ubsan_rt")) |ubsan_rt_mod| {
+            _ = try pt.importPkg(ubsan_rt_mod);
+        }
+
         if (zcu.root_mod.deps.get("compiler_rt")) |compiler_rt_mod| {
             _ = try pt.importPkg(compiler_rt_mod);
         }
@@ -2248,6 +2304,11 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
             try comp.queueJob(.{ .analyze_mod = compiler_rt_mod });
             zcu.analysis_roots.appendAssumeCapacity(compiler_rt_mod);
         }
+
+        if (zcu.root_mod.deps.get("ubsan_rt")) |ubsan_rt_mod| {
+            try comp.queueJob(.{ .analyze_mod = ubsan_rt_mod });
+            zcu.analysis_roots.appendAssumeCapacity(ubsan_rt_mod);
+        }
     }
 
     try comp.performAllTheWork(main_progress_node);
@@ -2593,6 +2654,7 @@ fn addNonIncrementalStuffToCacheManifest(
     man.hash.add(comp.link_eh_frame_hdr);
     man.hash.add(comp.skip_linker_dependencies);
     man.hash.add(comp.include_compiler_rt);
+    man.hash.add(comp.include_ubsan_rt);
     man.hash.add(comp.rc_includes);
     man.hash.addListOfBytes(comp.force_undefined_symbols.keys());
     man.hash.addListOfBytes(comp.framework_dirs);
@@ -3683,6 +3745,14 @@ fn performAllTheWorkInner(
         comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "fuzzer.zig", .libfuzzer, .Lib, true, &comp.fuzzer_lib, main_progress_node });
     }
 
+    if (comp.queued_jobs.ubsan_rt_lib and comp.ubsan_rt_lib == null) {
+        comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan.zig", .libubsan, .Lib, &comp.ubsan_rt_lib, main_progress_node });
+    }
+
+    if (comp.queued_jobs.ubsan_rt_obj and comp.ubsan_rt_obj == null) {
+        comp.link_task_wait_group.spawnManager(buildRt, .{ comp, "ubsan.zig", .libubsan, .Obj, &comp.ubsan_rt_obj, main_progress_node });
+    }
+
     if (comp.queued_jobs.glibc_shared_objects) {
         comp.link_task_wait_group.spawnManager(buildGlibcSharedObjects, .{ comp, main_progress_node });
     }
@@ -5916,7 +5986,11 @@ pub fn addCCArgs(
                     // These args have to be added after the `-fsanitize` arg or
                     // they won't take effect.
                     if (mod.sanitize_c) {
+                        // This check requires implementing the Itanium C++ ABI.
+                        // We would make it `-fsanitize-trap=vptr`, however this check requires
+                        // a full runtime due to the type hashing involved.
                         try argv.append("-fno-sanitize=vptr");
+
                         // It is very common, and well-defined, for a pointer on one side of a C ABI
                         // to have a different but compatible element type. Examples include:
                         // `char*` vs `uint8_t*` on a system with 8-bit bytes
@@ -5926,6 +6000,8 @@ pub fn addCCArgs(
                         // function was called.
                         try argv.append("-fno-sanitize=function");
 
+                        // It's recommended to use the minimal runtime in production environments
+                        // due to the security implications of the full runtime.
                         if (mod.optimize_mode == .ReleaseSafe) {
                             try argv.append("-fsanitize-minimal-runtime");
                         }
src/link.zig
@@ -1107,6 +1107,11 @@ pub const File = struct {
         else
             null;
 
+        const ubsan_rt_path: ?Path = if (comp.include_ubsan_rt)
+            comp.ubsan_rt_obj.?.full_object_path
+        else
+            null;
+
         // This function follows the same pattern as link.Elf.linkWithLLD so if you want some
         // insight as to what's going on here you can read that function body which is more
         // well-commented.
@@ -1136,6 +1141,7 @@ pub const File = struct {
             }
             try man.addOptionalFile(zcu_obj_path);
             try man.addOptionalFilePath(compiler_rt_path);
+            try man.addOptionalFilePath(ubsan_rt_path);
 
             // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock.
             _ = try man.hit();
@@ -1181,6 +1187,7 @@ pub const File = struct {
         }
         if (zcu_obj_path) |p| object_files.appendAssumeCapacity(try arena.dupeZ(u8, p));
         if (compiler_rt_path) |p| object_files.appendAssumeCapacity(try p.toStringZ(arena));
+        if (ubsan_rt_path) |p| object_files.appendAssumeCapacity(try p.toStringZ(arena));
 
         if (comp.verbose_link) {
             std.debug.print("ar rcs {s}", .{full_out_path_z});
src/main.zig
@@ -849,6 +849,7 @@ fn buildOutputType(
     var emit_h: Emit = .no;
     var soname: SOName = undefined;
     var want_compiler_rt: ?bool = null;
+    var want_ubsan_rt: ?bool = null;
     var linker_script: ?[]const u8 = null;
     var version_script: ?[]const u8 = null;
     var linker_repro: ?bool = null;
@@ -1376,6 +1377,10 @@ fn buildOutputType(
                         want_compiler_rt = true;
                     } else if (mem.eql(u8, arg, "-fno-compiler-rt")) {
                         want_compiler_rt = false;
+                    } else if (mem.eql(u8, arg, "-fubsan-rt")) {
+                        want_ubsan_rt = true;
+                    } else if (mem.eql(u8, arg, "-fno-ubsan-rt")) {
+                        want_ubsan_rt = false;
                     } else if (mem.eql(u8, arg, "-feach-lib-rpath")) {
                         create_module.each_lib_rpath = true;
                     } else if (mem.eql(u8, arg, "-fno-each-lib-rpath")) {
@@ -3504,6 +3509,7 @@ fn buildOutputType(
         .windows_lib_names = create_module.windows_libs.keys(),
         .wasi_emulated_libs = create_module.wasi_emulated_libs.items,
         .want_compiler_rt = want_compiler_rt,
+        .want_ubsan_rt = want_ubsan_rt,
         .hash_style = hash_style,
         .linker_script = linker_script,
         .version_script = version_script,