Commit 54b7e144b1

Andrew Kelley <andrew@ziglang.org>
2024-07-22 03:12:22
initial support for integrated fuzzing
* Add the `-ffuzz` and `-fno-fuzz` CLI arguments. * Detect fuzz testing flags from zig cc. * Set the correct clang flags when fuzz testing is requested. It can be combined with TSAN and UBSAN. * Compilation: build fuzzer library when needed which is currently an empty zig file. * Add optforfuzzing to every function in the llvm backend for modules that have requested fuzzing. * In ZigLLVMTargetMachineEmitToFile, add the optimization passes for sanitizer coverage. * std.mem.eql uses a naive implementation optimized for fuzzing when builtin.fuzz is true. Tracked by #20702
1 parent eac7fd4
lib/std/mem.zig
@@ -636,18 +636,20 @@ test lessThan {
     try testing.expect(lessThan(u8, "", "a"));
 }
 
-const backend_can_use_eql_bytes = switch (builtin.zig_backend) {
+const eqlBytes_allowed = switch (builtin.zig_backend) {
     // The SPIR-V backend does not support the optimized path yet.
     .stage2_spirv64 => false,
     // The RISC-V does not support vectors.
     .stage2_riscv64 => false,
-    else => true,
+    // The naive memory comparison implementation is more useful for fuzzers to
+    // find interesting inputs.
+    else => !builtin.fuzz,
 };
 
 /// Compares two slices and returns whether they are equal.
 pub fn eql(comptime T: type, a: []const T, b: []const T) bool {
     if (@sizeOf(T) == 0) return true;
-    if (!@inComptime() and std.meta.hasUniqueRepresentation(T) and backend_can_use_eql_bytes) return eqlBytes(sliceAsBytes(a), sliceAsBytes(b));
+    if (!@inComptime() and std.meta.hasUniqueRepresentation(T) and eqlBytes_allowed) return eqlBytes(sliceAsBytes(a), sliceAsBytes(b));
 
     if (a.len != b.len) return false;
     if (a.len == 0 or a.ptr == b.ptr) return true;
@@ -660,9 +662,7 @@ pub fn eql(comptime T: type, a: []const T, b: []const T) bool {
 
 /// std.mem.eql heavily optimized for slices of bytes.
 fn eqlBytes(a: []const u8, b: []const u8) bool {
-    if (!backend_can_use_eql_bytes) {
-        return eql(u8, a, b);
-    }
+    comptime assert(eqlBytes_allowed);
 
     if (a.len != b.len) return false;
     if (a.len == 0 or a.ptr == b.ptr) return true;
lib/fuzzer.zig
src/codegen/llvm/bindings.zig
@@ -93,6 +93,7 @@ pub const TargetMachine = opaque {
         is_small: bool,
         time_report: bool,
         tsan: bool,
+        sancov: bool,
         lto: bool,
         asm_filename: ?[*:0]const u8,
         bin_filename: ?[*:0]const u8,
src/codegen/llvm.zig
@@ -1101,6 +1101,7 @@ pub const Object = struct {
         is_small: bool,
         time_report: bool,
         sanitize_thread: bool,
+        fuzz: bool,
         lto: bool,
     };
 
@@ -1287,6 +1288,7 @@ pub const Object = struct {
                 options.is_small,
                 options.time_report,
                 options.sanitize_thread,
+                options.fuzz,
                 options.lto,
                 null,
                 emit_bin_path,
@@ -1311,6 +1313,7 @@ pub const Object = struct {
             options.is_small,
             options.time_report,
             options.sanitize_thread,
+            options.fuzz,
             options.lto,
             options.asm_path,
             emit_bin_path,
@@ -2982,6 +2985,9 @@ pub const Object = struct {
         if (owner_mod.sanitize_thread) {
             try attributes.addFnAttr(.sanitize_thread, &o.builder);
         }
+        if (owner_mod.fuzz) {
+            try attributes.addFnAttr(.optforfuzzing, &o.builder);
+        }
         const target = owner_mod.resolved_target.result;
         if (target.cpu.model.llvm_name) |s| {
             try attributes.addFnAttr(.{ .string = .{
src/Compilation/Config.zig
@@ -32,6 +32,7 @@ any_non_single_threaded: bool,
 /// per-Module setting.
 any_error_tracing: bool,
 any_sanitize_thread: bool,
+any_fuzz: bool,
 pie: bool,
 /// If this is true then linker code is responsible for making an LLVM IR
 /// Module, outputting it to an object file, and then linking that together
@@ -82,6 +83,7 @@ pub const Options = struct {
     ensure_libcpp_on_non_freestanding: bool = false,
     any_non_single_threaded: bool = false,
     any_sanitize_thread: bool = false,
+    any_fuzz: bool = false,
     any_unwind_tables: bool = false,
     any_dyn_libs: bool = false,
     any_c_source_files: bool = false,
@@ -486,6 +488,7 @@ pub fn resolve(options: Options) ResolveError!Config {
         .any_non_single_threaded = options.any_non_single_threaded,
         .any_error_tracing = any_error_tracing,
         .any_sanitize_thread = options.any_sanitize_thread,
+        .any_fuzz = options.any_fuzz,
         .root_error_tracing = root_error_tracing,
         .pie = pie,
         .lto = lto,
src/Package/Module.zig
@@ -26,6 +26,7 @@ stack_protector: u32,
 red_zone: bool,
 sanitize_c: bool,
 sanitize_thread: bool,
+fuzz: bool,
 unwind_tables: bool,
 cc_argv: []const []const u8,
 /// (SPIR-V) whether to generate a structured control flow graph or not
@@ -92,6 +93,7 @@ pub const CreateOptions = struct {
         unwind_tables: ?bool = null,
         sanitize_c: ?bool = null,
         sanitize_thread: ?bool = null,
+        fuzz: ?bool = null,
         structured_cfg: ?bool = null,
     };
 };
@@ -106,6 +108,7 @@ pub const ResolvedTarget = struct {
 /// At least one of `parent` and `resolved_target` must be non-null.
 pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
     if (options.inherited.sanitize_thread == true) assert(options.global.any_sanitize_thread);
+    if (options.inherited.fuzz == true) assert(options.global.any_fuzz);
     if (options.inherited.single_threaded == false) assert(options.global.any_non_single_threaded);
     if (options.inherited.unwind_tables == true) assert(options.global.any_unwind_tables);
     if (options.inherited.error_tracing == true) assert(options.global.any_error_tracing);
@@ -210,6 +213,12 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
         break :b false;
     };
 
+    const fuzz = b: {
+        if (options.inherited.fuzz) |x| break :b x;
+        if (options.parent) |p| break :b p.fuzz;
+        break :b false;
+    };
+
     const code_model = b: {
         if (options.inherited.code_model) |x| break :b x;
         if (options.parent) |p| break :b p.code_model;
@@ -337,6 +346,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
         .red_zone = red_zone,
         .sanitize_c = sanitize_c,
         .sanitize_thread = sanitize_thread,
+        .fuzz = fuzz,
         .unwind_tables = unwind_tables,
         .cc_argv = options.cc_argv,
         .structured_cfg = structured_cfg,
@@ -359,6 +369,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
             .error_tracing = error_tracing,
             .valgrind = valgrind,
             .sanitize_thread = sanitize_thread,
+            .fuzz = fuzz,
             .pic = pic,
             .pie = options.global.pie,
             .strip = strip,
@@ -427,6 +438,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
             .red_zone = red_zone,
             .sanitize_c = sanitize_c,
             .sanitize_thread = sanitize_thread,
+            .fuzz = fuzz,
             .unwind_tables = unwind_tables,
             .cc_argv = &.{},
             .structured_cfg = structured_cfg,
@@ -485,6 +497,7 @@ pub fn createLimited(gpa: Allocator, options: LimitedOptions) Allocator.Error!*P
         .red_zone = undefined,
         .sanitize_c = undefined,
         .sanitize_thread = undefined,
+        .fuzz = undefined,
         .unwind_tables = undefined,
         .cc_argv = undefined,
         .structured_cfg = undefined,
src/Builtin.zig
@@ -10,6 +10,7 @@ optimize_mode: std.builtin.OptimizeMode,
 error_tracing: bool,
 valgrind: bool,
 sanitize_thread: bool,
+fuzz: bool,
 pic: bool,
 pie: bool,
 strip: bool,
@@ -185,6 +186,7 @@ pub fn append(opts: @This(), buffer: *std.ArrayList(u8)) Allocator.Error!void {
         \\pub const have_error_return_tracing = {};
         \\pub const valgrind_support = {};
         \\pub const sanitize_thread = {};
+        \\pub const fuzz = {};
         \\pub const position_independent_code = {};
         \\pub const position_independent_executable = {};
         \\pub const strip_debug_info = {};
@@ -199,6 +201,7 @@ pub fn append(opts: @This(), buffer: *std.ArrayList(u8)) Allocator.Error!void {
         opts.error_tracing,
         opts.valgrind,
         opts.sanitize_thread,
+        opts.fuzz,
         opts.pic,
         opts.pie,
         opts.strip,
src/Compilation.zig
@@ -190,6 +190,7 @@ debug_compile_errors: bool,
 incremental: bool,
 job_queued_compiler_rt_lib: bool = false,
 job_queued_compiler_rt_obj: bool = false,
+job_queued_fuzzer_lib: bool = false,
 job_queued_update_builtin_zig: bool,
 alloc_failure_occurred: bool = false,
 formatted_panics: bool = false,
@@ -231,6 +232,10 @@ compiler_rt_lib: ?CRTFile = null,
 /// Populated when we build the compiler_rt_obj object. A Job to build this is indicated
 /// by setting `job_queued_compiler_rt_obj` and resolved before calling linker.flush().
 compiler_rt_obj: ?CRTFile = null,
+/// Populated when we build the libfuzzer static library. A Job to build this
+/// is indicated by setting `job_queued_fuzzer_lib` and resolved before
+/// calling linker.flush().
+fuzzer_lib: ?CRTFile = null,
 
 glibc_so_files: ?glibc.BuiltSharedObjects = null,
 wasi_emulated_libs: []const wasi_libc.CRTFile,
@@ -799,6 +804,7 @@ pub const MiscTask = enum {
     libcxx,
     libcxxabi,
     libtsan,
+    libfuzzer,
     wasi_libc_crt_file,
     compiler_rt,
     zig_libc,
@@ -887,6 +893,7 @@ pub const cache_helpers = struct {
         hh.add(mod.red_zone);
         hh.add(mod.sanitize_c);
         hh.add(mod.sanitize_thread);
+        hh.add(mod.fuzz);
         hh.add(mod.unwind_tables);
         hh.add(mod.structured_cfg);
         hh.addListOfBytes(mod.cc_argv);
@@ -1302,6 +1309,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
         const any_unwind_tables = options.config.any_unwind_tables or options.root_mod.unwind_tables;
         const any_non_single_threaded = options.config.any_non_single_threaded or !options.root_mod.single_threaded;
         const any_sanitize_thread = options.config.any_sanitize_thread or options.root_mod.sanitize_thread;
+        const any_fuzz = options.config.any_fuzz or options.root_mod.fuzz;
 
         const link_eh_frame_hdr = options.link_eh_frame_hdr or any_unwind_tables;
         const build_id = options.build_id orelse .none;
@@ -1563,6 +1571,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
         comp.config.any_unwind_tables = any_unwind_tables;
         comp.config.any_non_single_threaded = any_non_single_threaded;
         comp.config.any_sanitize_thread = any_sanitize_thread;
+        comp.config.any_fuzz = any_fuzz;
 
         const lf_open_opts: link.File.OpenOptions = .{
             .linker_script = options.linker_script,
@@ -1908,6 +1917,13 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
             }
         }
 
+        if (comp.config.any_fuzz and capable_of_building_compiler_rt) {
+            if (is_exe_or_dyn_lib) {
+                log.debug("queuing a job to build libfuzzer", .{});
+                comp.job_queued_fuzzer_lib = true;
+            }
+        }
+
         if (!comp.skip_linker_dependencies and is_exe_or_dyn_lib and
             !comp.config.link_libc and capable_of_building_zig_libc)
         {
@@ -1956,6 +1972,9 @@ pub fn destroy(comp: *Compilation) void {
     if (comp.compiler_rt_obj) |*crt_file| {
         crt_file.deinit(gpa);
     }
+    if (comp.fuzzer_lib) |*crt_file| {
+        crt_file.deinit(gpa);
+    }
     if (comp.libc_static_lib) |*crt_file| {
         crt_file.deinit(gpa);
     }
@@ -2721,6 +2740,7 @@ pub fn emitLlvmObject(
         .is_small = comp.root_mod.optimize_mode == .ReleaseSmall,
         .time_report = comp.time_report,
         .sanitize_thread = comp.config.any_sanitize_thread,
+        .fuzz = comp.config.any_fuzz,
         .lto = comp.config.lto,
     });
 }
@@ -3641,15 +3661,9 @@ fn performAllTheWorkInner(
         break;
     }
 
-    if (comp.job_queued_compiler_rt_lib) {
-        comp.job_queued_compiler_rt_lib = false;
-        buildCompilerRtOneShot(comp, .Lib, &comp.compiler_rt_lib, main_progress_node);
-    }
-
-    if (comp.job_queued_compiler_rt_obj) {
-        comp.job_queued_compiler_rt_obj = false;
-        buildCompilerRtOneShot(comp, .Obj, &comp.compiler_rt_obj, main_progress_node);
-    }
+    buildCompilerRtOneShot(comp, &comp.job_queued_compiler_rt_lib, "compiler_rt.zig", .compiler_rt, .Lib, &comp.compiler_rt_lib, main_progress_node);
+    buildCompilerRtOneShot(comp, &comp.job_queued_compiler_rt_obj, "compiler_rt.zig", .compiler_rt, .Obj, &comp.compiler_rt_obj, main_progress_node);
+    buildCompilerRtOneShot(comp, &comp.job_queued_fuzzer_lib, "fuzzer.zig", .libfuzzer, .Lib, &comp.fuzzer_lib, main_progress_node);
 }
 
 const JobError = Allocator.Error;
@@ -4655,23 +4669,27 @@ fn workerUpdateWin32Resource(
 
 fn buildCompilerRtOneShot(
     comp: *Compilation,
+    job_queued: *bool,
+    root_source_name: []const u8,
+    misc_task: MiscTask,
     output_mode: std.builtin.OutputMode,
     out: *?CRTFile,
     prog_node: std.Progress.Node,
 ) void {
+    if (!job_queued.*) return;
+    job_queued.* = false;
+
     comp.buildOutputFromZig(
-        "compiler_rt.zig",
+        root_source_name,
         output_mode,
         out,
-        .compiler_rt,
+        misc_task,
         prog_node,
     ) catch |err| switch (err) {
         error.SubCompilationFailed => return, // error reported already
-        else => comp.lockAndSetMiscFailure(
-            .compiler_rt,
-            "unable to build compiler_rt: {s}",
-            .{@errorName(err)},
-        ),
+        else => comp.lockAndSetMiscFailure(misc_task, "unable to build {s}: {s}", .{
+            @tagName(misc_task), @errorName(err),
+        }),
     };
 }
 
@@ -5602,23 +5620,32 @@ pub fn addCCArgs(
                 try argv.append("-mthumb");
             }
 
-            if (mod.sanitize_c and !mod.sanitize_thread) {
-                try argv.append("-fsanitize=undefined");
-                try argv.append("-fsanitize-trap=undefined");
-                // It is very common, and well-defined, for a pointer on one side of a C ABI
-                // to have a different but compatible element type. Examples include:
-                // `char*` vs `uint8_t*` on a system with 8-bit bytes
-                // `const char*` vs `char*`
-                // `char*` vs `unsigned char*`
-                // Without this flag, Clang would invoke UBSAN when such an extern
-                // function was called.
-                try argv.append("-fno-sanitize=function");
-            } else if (mod.sanitize_c and mod.sanitize_thread) {
-                try argv.append("-fsanitize=undefined,thread");
-                try argv.append("-fsanitize-trap=undefined");
-                try argv.append("-fno-sanitize=function");
-            } else if (!mod.sanitize_c and mod.sanitize_thread) {
-                try argv.append("-fsanitize=thread");
+            {
+                var san_arg: std.ArrayListUnmanaged(u8) = .{};
+                const prefix = "-fsanitize=";
+                if (mod.sanitize_c) {
+                    if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix);
+                    try san_arg.appendSlice(arena, "undefined,");
+                    try argv.append("-fsanitize-trap=undefined");
+                    // It is very common, and well-defined, for a pointer on one side of a C ABI
+                    // to have a different but compatible element type. Examples include:
+                    // `char*` vs `uint8_t*` on a system with 8-bit bytes
+                    // `const char*` vs `char*`
+                    // `char*` vs `unsigned char*`
+                    // Without this flag, Clang would invoke UBSAN when such an extern
+                    // function was called.
+                    try argv.append("-fno-sanitize=function");
+                }
+                if (mod.sanitize_thread) {
+                    if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix);
+                    try san_arg.appendSlice(arena, "thread,");
+                }
+                if (mod.fuzz) {
+                    if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix);
+                    try san_arg.appendSlice(arena, "fuzzer-no-link,");
+                }
+                // Chop off the trailing comma and append to argv.
+                if (san_arg.popOrNull()) |_| try argv.append(san_arg.items);
             }
 
             if (mod.red_zone) {
src/main.zig
@@ -499,12 +499,14 @@ const usage_build_generic =
     \\  -fno-stack-check          Disable stack probing in safe builds
     \\  -fstack-protector         Enable stack protection in unsafe builds
     \\  -fno-stack-protector      Disable stack protection in safe builds
-    \\  -fsanitize-c              Enable C undefined behavior detection in unsafe builds
-    \\  -fno-sanitize-c           Disable C undefined behavior detection in safe builds
     \\  -fvalgrind                Include valgrind client requests in release builds
     \\  -fno-valgrind             Omit valgrind client requests in debug builds
+    \\  -fsanitize-c              Enable C undefined behavior detection in unsafe builds
+    \\  -fno-sanitize-c           Disable C undefined behavior detection in safe builds
     \\  -fsanitize-thread         Enable Thread Sanitizer
     \\  -fno-sanitize-thread      Disable Thread Sanitizer
+    \\  -ffuzz                    Enable fuzz testing instrumentation
+    \\  -fno-fuzz                 Disable fuzz testing instrumentation
     \\  -funwind-tables           Always produce unwind table entries for all functions
     \\  -fno-unwind-tables        Never produce unwind table entries
     \\  -ferror-tracing           Enable error tracing in ReleaseFast mode
@@ -1429,6 +1431,10 @@ fn buildOutputType(
                         mod_opts.sanitize_thread = true;
                     } else if (mem.eql(u8, arg, "-fno-sanitize-thread")) {
                         mod_opts.sanitize_thread = false;
+                    } else if (mem.eql(u8, arg, "-ffuzz")) {
+                        mod_opts.fuzz = true;
+                    } else if (mem.eql(u8, arg, "-fno-fuzz")) {
+                        mod_opts.fuzz = false;
                     } else if (mem.eql(u8, arg, "-fllvm")) {
                         create_module.opts.use_llvm = true;
                     } else if (mem.eql(u8, arg, "-fno-llvm")) {
@@ -2060,11 +2066,21 @@ fn buildOutputType(
                         create_module.opts.debug_format = .{ .dwarf = .@"64" };
                     },
                     .sanitize => {
-                        if (mem.eql(u8, it.only_arg, "undefined")) {
-                            mod_opts.sanitize_c = true;
-                        } else if (mem.eql(u8, it.only_arg, "thread")) {
-                            mod_opts.sanitize_thread = true;
-                        } else {
+                        var san_it = std.mem.splitScalar(u8, it.only_arg, ',');
+                        var recognized_any = false;
+                        while (san_it.next()) |sub_arg| {
+                            if (mem.eql(u8, sub_arg, "undefined")) {
+                                mod_opts.sanitize_c = true;
+                                recognized_any = true;
+                            } else if (mem.eql(u8, sub_arg, "thread")) {
+                                mod_opts.sanitize_thread = true;
+                                recognized_any = true;
+                            } else if (mem.eql(u8, sub_arg, "fuzzer") or mem.eql(u8, sub_arg, "fuzzer-no-link")) {
+                                mod_opts.fuzz = true;
+                                recognized_any = true;
+                            }
+                        }
+                        if (!recognized_any) {
                             try cc_argv.appendSlice(arena, it.other_args);
                         }
                     },
@@ -2642,6 +2658,8 @@ fn buildOutputType(
             create_module.opts.any_non_single_threaded = true;
         if (mod_opts.sanitize_thread == true)
             create_module.opts.any_sanitize_thread = true;
+        if (mod_opts.fuzz == true)
+            create_module.opts.any_fuzz = true;
         if (mod_opts.unwind_tables == true)
             create_module.opts.any_unwind_tables = true;
         if (mod_opts.strip == false)
@@ -7491,6 +7509,8 @@ fn handleModArg(
         create_module.opts.any_non_single_threaded = true;
     if (mod_opts.sanitize_thread == true)
         create_module.opts.any_sanitize_thread = true;
+    if (mod_opts.fuzz == true)
+        create_module.opts.any_fuzz = true;
     if (mod_opts.unwind_tables == true)
         create_module.opts.any_unwind_tables = true;
     if (mod_opts.strip == false)
src/zig_llvm.cpp
@@ -54,6 +54,7 @@
 #include <llvm/Transforms/IPO.h>
 #include <llvm/Transforms/IPO/AlwaysInliner.h>
 #include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
+#include <llvm/Transforms/Instrumentation/SanitizerCoverage.h>
 #include <llvm/Transforms/Scalar.h>
 #include <llvm/Transforms/Utils.h>
 #include <llvm/Transforms/Utils/AddDiscriminators.h>
@@ -188,9 +189,10 @@ struct TimeTracerRAII {
 };
 } // end anonymous namespace
 
+
 bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMModuleRef module_ref,
         char **error_message, bool is_debug,
-        bool is_small, bool time_report, bool tsan, bool lto,
+        bool is_small, bool time_report, bool tsan, bool sancov, bool lto,
         const char *asm_filename, const char *bin_filename,
         const char *llvm_ir_filename, const char *bitcode_filename)
 {
@@ -303,13 +305,18 @@ bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMM
         });
     }
 
-    // Thread sanitizer
-    if (tsan) {
-        pass_builder.registerOptimizerLastEPCallback([](ModulePassManager &module_pm, OptimizationLevel level) {
+    pass_builder.registerOptimizerLastEPCallback([&](ModulePassManager &module_pm, OptimizationLevel level) {
+        // Code coverage instrumentation.
+        if (sancov) {
+            module_pm.addPass(SanitizerCoveragePass());
+        }
+
+        // Thread sanitizer
+        if (tsan) {
             module_pm.addPass(ModuleThreadSanitizerPass());
             module_pm.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
-        });
-    }
+        }
+    });
 
     ModulePassManager module_pm;
     OptimizationLevel opt_level;
src/zig_llvm.h
@@ -26,7 +26,7 @@
 
 ZIG_EXTERN_C bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMModuleRef module_ref,
         char **error_message, bool is_debug,
-        bool is_small, bool time_report, bool tsan, bool lto,
+        bool is_small, bool time_report, bool tsan, bool sancov, bool lto,
         const char *asm_filename, const char *bin_filename,
         const char *llvm_ir_filename, const char *bitcode_filename);