Commit 97643c1ecc

Andrew Kelley <andrew@ziglang.org>
2024-07-30 06:44:38
fuzzer: track code coverage from all runs
When a unique run is encountered, track it in a bit set memory-mapped into the fuzz directory so it can be observed by other processes, even while the fuzzer is running.
1 parent a60810b
Changed files (2)
lib/compiler/test_runner.zig
@@ -41,6 +41,7 @@ pub fn main() void {
     }
 
     fba.reset();
+    if (builtin.fuzz) fuzzer_init();
 
     if (listen) {
         return mainServer() catch @panic("internal test runner failure");
@@ -323,6 +324,7 @@ const FuzzerSlice = extern struct {
 var is_fuzz_test: bool = undefined;
 
 extern fn fuzzer_next() FuzzerSlice;
+extern fn fuzzer_init() void;
 
 pub fn fuzzInput(options: testing.FuzzInputOptions) []const u8 {
     @disableInstrumentation();
lib/fuzzer.zig
@@ -2,6 +2,7 @@ const builtin = @import("builtin");
 const std = @import("std");
 const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
+const fatal = std.process.fatal;
 
 pub const std_options = .{
     .logFn = logOverride,
@@ -17,7 +18,7 @@ fn logOverride(
 ) void {
     if (builtin.mode != .Debug) return;
     const f = if (log_file) |f| f else f: {
-        const f = std.fs.cwd().createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file");
+        const f = fuzzer.dir.createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file");
         log_file = f;
         break :f f;
     };
@@ -28,16 +29,17 @@ fn logOverride(
 
 export threadlocal var __sancov_lowest_stack: usize = 0;
 
-export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, stop: [*]u8) void {
-    std.log.debug("__sanitizer_cov_8bit_counters_init start={*}, stop={*}", .{ start, stop });
+var module_count_8bc: usize = 0;
+var module_count_pcs: usize = 0;
+
+export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, end: [*]u8) void {
+    assert(@atomicRmw(usize, &module_count_8bc, .Add, 1, .monotonic) == 0);
+    fuzzer.pc_counters = start[0 .. end - start];
 }
 
-export fn __sanitizer_cov_pcs_init(pc_start: [*]const usize, pc_end: [*]const usize) void {
-    std.log.debug("__sanitizer_cov_pcs_init pc_start={*}, pc_end={*}", .{ pc_start, pc_end });
-    fuzzer.pc_range = .{
-        .start = @intFromPtr(pc_start),
-        .end = @intFromPtr(pc_start),
-    };
+export fn __sanitizer_cov_pcs_init(start: [*]const Fuzzer.FlaggedPc, end: [*]const Fuzzer.FlaggedPc) void {
+    assert(@atomicRmw(usize, &module_count_pcs, .Add, 1, .monotonic) == 0);
+    fuzzer.flagged_pcs = start[0 .. end - start];
 }
 
 export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void {
@@ -102,11 +104,25 @@ const Fuzzer = struct {
     gpa: Allocator,
     rng: std.Random.DefaultPrng,
     input: std.ArrayListUnmanaged(u8),
-    pc_range: PcRange,
-    count: usize,
+    flagged_pcs: []const FlaggedPc,
+    pc_counters: []u8,
+    n_runs: usize,
     recent_cases: RunMap,
     deduplicated_runs: usize,
+    /// Data collected from code coverage instrumentation from one execution of
+    /// the test function.
     coverage: Coverage,
+    /// Tracks which PCs have been seen across all runs that do not crash the fuzzer process.
+    /// Stored in a memory-mapped file so that it can be shared with other
+    /// processes and viewed while the fuzzer is running.
+    seen_pcs: MemoryMappedList,
+    dir: std.fs.Dir,
+
+    const SeenPcsHeader = extern struct {
+        n_runs: usize,
+        pcs_len: usize,
+        lowest_stack: usize,
+    };
 
     const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false);
 
@@ -161,9 +177,12 @@ const Fuzzer = struct {
         }
     };
 
-    const PcRange = struct {
-        start: usize,
-        end: usize,
+    const FlaggedPc = extern struct {
+        addr: usize,
+        flags: packed struct(usize) {
+            entry: bool,
+            _: @Type(.{ .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(usize) - 1 } }),
+        },
     };
 
     const Analysis = struct {
@@ -171,6 +190,56 @@ const Fuzzer = struct {
         id: Run.Id,
     };
 
+    fn init(f: *Fuzzer, dir: std.fs.Dir) !void {
+        f.dir = dir;
+
+        // Layout of this file:
+        // - Header
+        // - list of PC addresses (usize elements)
+        // - list of hit flag, 1 bit per address (stored in u8 elements)
+        const coverage_file = dir.createFile("coverage", .{
+            .read = true,
+            .truncate = false,
+        }) catch |err| fatal("unable to create coverage file: {s}", .{@errorName(err)});
+        const flagged_pcs = f.flagged_pcs;
+        const n_bitset_elems = (flagged_pcs.len + 7) / 8;
+        const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems;
+        const existing_len = coverage_file.getEndPos() catch |err| {
+            fatal("unable to check len of coverage file: {s}", .{@errorName(err)});
+        };
+        if (existing_len == 0) {
+            coverage_file.setEndPos(bytes_len) catch |err| {
+                fatal("unable to set len of coverage file: {s}", .{@errorName(err)});
+            };
+        } else if (existing_len != bytes_len) {
+            fatal("incompatible existing coverage file (differing lengths)", .{});
+        }
+        f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| {
+            fatal("unable to init coverage memory map: {s}", .{@errorName(err)});
+        };
+        if (existing_len != 0) {
+            const existing_pcs = std.mem.bytesAsSlice(usize, f.seen_pcs.items[@sizeOf(SeenPcsHeader)..][0 .. flagged_pcs.len * @sizeOf(usize)]);
+            for (existing_pcs, flagged_pcs, 0..) |old, new, i| {
+                if (old != new.addr) {
+                    fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{
+                        i, old, new.addr,
+                    });
+                }
+            }
+        } else {
+            const header: SeenPcsHeader = .{
+                .n_runs = 0,
+                .pcs_len = flagged_pcs.len,
+                .lowest_stack = std.math.maxInt(usize),
+            };
+            f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header));
+            for (flagged_pcs) |flagged_pc| {
+                f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&flagged_pc.addr));
+            }
+            f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems);
+        }
+    }
+
     fn analyzeLastRun(f: *Fuzzer) Analysis {
         return .{
             .id = f.coverage.run_id_hasher.final(),
@@ -194,7 +263,7 @@ const Fuzzer = struct {
                 .score = 0,
             }, {});
         } else {
-            if (f.count % 1000 == 0) f.dumpStats();
+            if (f.n_runs % 1000 == 0) f.dumpStats();
 
             const analysis = f.analyzeLastRun();
             const gop = f.recent_cases.getOrPutAssumeCapacity(.{
@@ -217,6 +286,25 @@ const Fuzzer = struct {
                     .input = try gpa.dupe(u8, f.input.items),
                     .score = analysis.score,
                 };
+
+                // Track code coverage from all runs.
+                {
+                    const seen_pcs = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + f.flagged_pcs.len * @sizeOf(usize) ..];
+                    for (seen_pcs, 0..) |*elem, i| {
+                        const byte_i = i / 8;
+                        const mask: u8 =
+                            (@as(u8, @intFromBool(f.pc_counters[byte_i + 0] != 0)) << 0) |
+                            (@as(u8, @intFromBool(f.pc_counters[byte_i + 1] != 0)) << 1) |
+                            (@as(u8, @intFromBool(f.pc_counters[byte_i + 2] != 0)) << 2) |
+                            (@as(u8, @intFromBool(f.pc_counters[byte_i + 3] != 0)) << 3) |
+                            (@as(u8, @intFromBool(f.pc_counters[byte_i + 4] != 0)) << 4) |
+                            (@as(u8, @intFromBool(f.pc_counters[byte_i + 5] != 0)) << 5) |
+                            (@as(u8, @intFromBool(f.pc_counters[byte_i + 6] != 0)) << 6) |
+                            (@as(u8, @intFromBool(f.pc_counters[byte_i + 7] != 0)) << 7);
+
+                        _ = @atomicRmw(u8, elem, .Or, mask, .monotonic);
+                    }
+                }
             }
 
             if (f.recent_cases.entries.len >= 100) {
@@ -244,8 +332,12 @@ const Fuzzer = struct {
         f.input.appendSliceAssumeCapacity(run.input);
         try f.mutate();
 
+        f.n_runs += 1;
+        const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]);
+        _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
+        _ = @atomicRmw(usize, &header.lowest_stack, .Min, __sancov_lowest_stack, .monotonic);
+        @memset(f.pc_counters, 0);
         f.coverage.reset();
-        f.count += 1;
         return f.input.items;
     }
 
@@ -257,8 +349,7 @@ const Fuzzer = struct {
 
     fn dumpStats(f: *Fuzzer) void {
         std.log.info("stats: runs={d} deduplicated={d}", .{
-            f.count,
-            f.deduplicated_runs,
+            f.n_runs, f.deduplicated_runs,
         });
         for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| {
             std.log.info("best[{d}] id={x} score={d} input: '{}'", .{
@@ -303,11 +394,14 @@ var fuzzer: Fuzzer = .{
     .gpa = general_purpose_allocator.allocator(),
     .rng = std.Random.DefaultPrng.init(0),
     .input = .{},
-    .pc_range = .{ .start = 0, .end = 0 },
-    .count = 0,
+    .flagged_pcs = undefined,
+    .pc_counters = undefined,
+    .n_runs = 0,
     .deduplicated_runs = 0,
     .recent_cases = .{},
     .coverage = undefined,
+    .dir = undefined,
+    .seen_pcs = undefined,
 };
 
 export fn fuzzer_next() Fuzzer.Slice {
@@ -315,3 +409,64 @@ export fn fuzzer_next() Fuzzer.Slice {
         error.OutOfMemory => @panic("out of memory"),
     });
 }
+
+export fn fuzzer_init() void {
+    if (module_count_8bc == 0) fatal("__sanitizer_cov_8bit_counters_init was never called", .{});
+    if (module_count_pcs == 0) fatal("__sanitizer_cov_pcs_init was never called", .{});
+
+    // TODO: move this to .zig-cache/f
+    const fuzz_dir = std.fs.cwd().makeOpenPath("f", .{ .iterate = true }) catch |err| {
+        fatal("unable to open fuzz directory 'f': {s}", .{@errorName(err)});
+    };
+    fuzzer.init(fuzz_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)});
+}
+
+/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping.
+pub const MemoryMappedList = struct {
+    /// Contents of the list.
+    ///
+    /// Pointers to elements in this slice are invalidated by various functions
+    /// of this ArrayList in accordance with the respective documentation. In
+    /// all cases, "invalidated" means that the memory has been passed to this
+    /// allocator's resize or free function.
+    items: []align(std.mem.page_size) volatile u8,
+    /// How many bytes this list can hold without allocating additional memory.
+    capacity: usize,
+
+    pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
+        const ptr = try std.posix.mmap(
+            null,
+            capacity,
+            std.posix.PROT.READ | std.posix.PROT.WRITE,
+            .{ .TYPE = .SHARED },
+            file.handle,
+            0,
+        );
+        return .{
+            .items = ptr[0..length],
+            .capacity = capacity,
+        };
+    }
+
+    /// Append the slice of items to the list.
+    /// Asserts that the list can hold the additional items.
+    pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void {
+        const old_len = l.items.len;
+        const new_len = old_len + items.len;
+        assert(new_len <= l.capacity);
+        l.items.len = new_len;
+        @memcpy(l.items[old_len..][0..items.len], items);
+    }
+
+    /// Append a value to the list `n` times.
+    /// Never invalidates element pointers.
+    /// The function is inline so that a comptime-known `value` parameter will
+    /// have better memset codegen in case it has a repeated byte pattern.
+    /// Asserts that the list can hold the additional items.
+    pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void {
+        const new_len = l.items.len + n;
+        assert(new_len <= l.capacity);
+        @memset(l.items.ptr[l.items.len..new_len], value);
+        l.items.len = new_len;
+    }
+};