Commit e66b269333
Changed files (7)
lib
src
codegen
lib/compiler/test_runner.zig
@@ -4,6 +4,7 @@ const builtin = @import("builtin");
const std = @import("std");
const testing = std.testing;
const assert = std.debug.assert;
+const fuzz_abi = std.Build.abi.fuzz;
pub const std_options: std.Options = .{
.logFn = log,
@@ -57,7 +58,7 @@ pub fn main() void {
fba.reset();
if (builtin.fuzz) {
const cache_dir = opt_cache_dir orelse @panic("missing --cache-dir=[path] argument");
- fuzzer_init(FuzzerSlice.fromSlice(cache_dir));
+ fuzz_abi.fuzzer_init(.fromSlice(cache_dir));
}
if (listen) {
@@ -78,7 +79,7 @@ fn mainServer() !void {
});
if (builtin.fuzz) {
- const coverage_id = fuzzer_coverage_id();
+ const coverage_id = fuzz_abi.fuzzer_coverage_id();
try server.serveU64Message(.coverage_id, coverage_id);
}
@@ -152,14 +153,19 @@ fn mainServer() !void {
});
},
.start_fuzzing => {
+ // This ensures that this code won't be analyzed and hence reference fuzzer symbols
+ // since they are not present.
if (!builtin.fuzz) unreachable;
+
const index = try server.receiveBody_u32();
const test_fn = builtin.test_functions[index];
const entry_addr = @intFromPtr(test_fn.func);
+
try server.serveU64Message(.fuzz_start_addr, entry_addr);
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
is_fuzz_test = false;
- fuzzer_set_name(test_fn.name.ptr, test_fn.name.len);
+ fuzz_test_index = index;
+
test_fn.func() catch |err| switch (err) {
error.SkipZigTest => return,
else => {
@@ -184,6 +190,8 @@ fn mainServer() !void {
fn mainTerminal() void {
@disableInstrumentation();
+ if (builtin.fuzz) @panic("fuzz test requires server");
+
const test_fn_list = builtin.test_functions;
var ok_count: usize = 0;
var skip_count: usize = 0;
@@ -333,28 +341,8 @@ pub fn mainSimple() anyerror!void {
if (failed != 0) std.process.exit(1);
}
-const FuzzerSlice = extern struct {
- ptr: [*]const u8,
- len: usize,
-
- /// Inline to avoid fuzzer instrumentation.
- inline fn toSlice(s: FuzzerSlice) []const u8 {
- return s.ptr[0..s.len];
- }
-
- /// Inline to avoid fuzzer instrumentation.
- inline fn fromSlice(s: []const u8) FuzzerSlice {
- return .{ .ptr = s.ptr, .len = s.len };
- }
-};
-
var is_fuzz_test: bool = undefined;
-
-extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void;
-extern fn fuzzer_init(cache_dir: FuzzerSlice) void;
-extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void;
-extern fn fuzzer_start(testOne: *const fn ([*]const u8, usize) callconv(.c) void) void;
-extern fn fuzzer_coverage_id() u64;
+var fuzz_test_index: u32 = undefined;
pub fn fuzz(
context: anytype,
@@ -385,12 +373,12 @@ pub fn fuzz(
const global = struct {
var ctx: @TypeOf(context) = undefined;
- fn fuzzer_one(input_ptr: [*]const u8, input_len: usize) callconv(.c) void {
+ fn test_one(input: fuzz_abi.Slice) callconv(.c) void {
@disableInstrumentation();
testing.allocator_instance = .{};
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
log_err_count = 0;
- testOne(ctx, input_ptr[0..input_len]) catch |err| switch (err) {
+ testOne(ctx, input.toSlice()) catch |err| switch (err) {
error.SkipZigTest => return,
else => {
std.debug.lockStdErr();
@@ -411,10 +399,11 @@ pub fn fuzz(
testing.allocator_instance = .{};
defer testing.allocator_instance = prev_allocator_state;
- for (options.corpus) |elem| fuzzer_init_corpus_elem(elem.ptr, elem.len);
-
global.ctx = context;
- fuzzer_start(&global.fuzzer_one);
+ fuzz_abi.fuzzer_init_test(&global.test_one, .fromSlice(builtin.test_functions[fuzz_test_index].name));
+ for (options.corpus) |elem|
+ fuzz_abi.fuzzer_new_input(.fromSlice(elem));
+ fuzz_abi.fuzzer_main();
return;
}
lib/std/Build/abi.zig
@@ -138,6 +138,26 @@ pub const Rebuild = extern struct {
/// ABI bits specifically relating to the fuzzer interface.
pub const fuzz = struct {
+ pub const TestOne = *const fn (Slice) callconv(.c) void;
+ pub extern fn fuzzer_init(cache_dir_path: Slice) void;
+ pub extern fn fuzzer_coverage_id() u64;
+ pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void;
+ pub extern fn fuzzer_new_input(bytes: Slice) void;
+ pub extern fn fuzzer_main() void;
+
+ pub const Slice = extern struct {
+ ptr: [*]const u8,
+ len: usize,
+
+ pub fn toSlice(s: Slice) []const u8 {
+ return s.ptr[0..s.len];
+ }
+
+ pub fn fromSlice(s: []const u8) Slice {
+ return .{ .ptr = s.ptr, .len = s.len };
+ }
+ };
+
/// libfuzzer uses this and its usize is the one that counts. To match the ABI,
/// make the ints be the size of the target used with libfuzzer.
///
lib/std/Build/Fuzz.zig
@@ -252,9 +252,8 @@ pub fn sendUpdate(
const seen_pcs = cov_header.seenBits();
const n_runs = @atomicLoad(usize, &cov_header.n_runs, .monotonic);
const unique_runs = @atomicLoad(usize, &cov_header.unique_runs, .monotonic);
- if (prev.unique_runs != unique_runs) {
- // There has been an update.
- if (prev.unique_runs == 0) {
+ {
+ if (unique_runs != 0 and prev.unique_runs == 0) {
// We need to send initial context.
const header: abi.SourceIndexHeader = .{
.directories_len = @intCast(coverage_map.coverage.directories.entries.len),
lib/fuzzer.zig
@@ -1,532 +1,1465 @@
const builtin = @import("builtin");
const std = @import("std");
-const Allocator = std.mem.Allocator;
+const mem = std.mem;
+const math = std.math;
+const Allocator = mem.Allocator;
const assert = std.debug.assert;
-const fatal = std.process.fatal;
-const SeenPcsHeader = std.Build.abi.fuzz.SeenPcsHeader;
+const panic = std.debug.panic;
+const abi = std.Build.abi.fuzz;
+const native_endian = builtin.cpu.arch.endian();
pub const std_options = std.Options{
.logFn = logOverride,
};
-var log_file_buffer: [256]u8 = undefined;
-var log_file_writer: ?std.fs.File.Writer = null;
-
fn logOverride(
comptime level: std.log.Level,
comptime scope: @Type(.enum_literal),
comptime format: []const u8,
args: anytype,
) void {
- const fw = if (log_file_writer) |*f| f else f: {
- const f = fuzzer.cache_dir.createFile("tmp/libfuzzer.log", .{}) catch
- @panic("failed to open fuzzer log file");
- log_file_writer = f.writer(&log_file_buffer);
- break :f &log_file_writer.?;
- };
+ const f = log_f orelse
+ panic("attempt to use log before initialization, message:\n" ++ format, args);
+ f.lock(.exclusive) catch |e| panic("failed to lock logging file: {t}", .{e});
+ defer f.unlock();
+
+ var buf: [256]u8 = undefined;
+ var fw = f.writer(&buf);
+ const end = f.getEndPos() catch |e| panic("failed to get fuzzer log file end: {t}", .{e});
+ fw.seekTo(end) catch |e| panic("failed to seek to fuzzer log file end: {t}", .{e});
+
const prefix1 = comptime level.asText();
const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
- fw.interface.print(prefix1 ++ prefix2 ++ format ++ "\n", args) catch
- @panic("failed to write to fuzzer log");
- fw.interface.flush() catch @panic("failed to flush fuzzer log");
+ fw.interface.print(
+ "[{s}] " ++ prefix1 ++ prefix2 ++ format ++ "\n",
+ .{current_test_name orelse "setup"} ++ args,
+ ) catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
+ fw.interface.flush() catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
}
-/// Helps determine run uniqueness in the face of recursion.
-export threadlocal var __sancov_lowest_stack: usize = 0;
+var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
+const gpa = switch (builtin.mode) {
+ .Debug => debug_allocator.allocator(),
+ .ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator,
+};
-export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void {
- handleCmp(@returnAddress(), arg1, arg2);
-}
+/// Part of `exec`, however seperate to allow it to be set before `exec` is.
+var log_f: ?std.fs.File = null;
+var exec: Executable = .preinit;
+var inst: Instrumentation = .preinit;
+var fuzzer: Fuzzer = undefined;
+var current_test_name: ?[]const u8 = null;
-export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
- handleCmp(@returnAddress(), arg1, arg2);
+fn bitsetUsizes(elems: usize) usize {
+ return math.divCeil(usize, elems, @bitSizeOf(usize)) catch unreachable;
}
-export fn __sanitizer_cov_trace_const_cmp2(arg1: u16, arg2: u16) void {
- handleCmp(@returnAddress(), arg1, arg2);
-}
+const Executable = struct {
+ /// Tracks the hit count for each pc as updated by the process's instrumentation.
+ pc_counters: []u8,
+ /// Read-only memory section containing compiled-in constants found from parsing the executable
+ rodata_addr: usize,
+ rodata_size: usize,
+
+ cache_f: std.fs.Dir,
+ /// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed
+ /// while the fuzzer is running.
+ shared_seen_pcs: MemoryMappedList,
+ /// Hash of pcs used to uniquely identify the shared coverage file
+ pc_digest: u64,
+
+ /// A minimal state for this struct which instrumentation can function on.
+ /// Used before this structure is initialized to avoid illegal behavior
+ /// from instrumentation functions being called and using undefined values.
+ pub const preinit: Executable = .{
+ .rodata_addr = 0,
+ .rodata_size = 0,
+ .pc_counters = undefined, // instrumentation works off the __sancov_cntrs section
+ .cache_f = undefined,
+ .shared_seen_pcs = undefined,
+ .pc_digest = undefined,
+ };
-export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
- handleCmp(@returnAddress(), arg1, arg2);
-}
+ /// Even on error, this initializes rodata_addr and rodata_size to valid values
+ fn initRodata(self: *Executable) !void {
+ errdefer {
+ self.rodata_addr = 0;
+ self.rodata_size = 0;
+ }
-export fn __sanitizer_cov_trace_const_cmp4(arg1: u32, arg2: u32) void {
- handleCmp(@returnAddress(), arg1, arg2);
-}
+ const exec_path = std.fs.selfExePathAlloc(gpa) catch |e|
+ if (e == error.OutOfMemory) @panic("OOM") else return e;
+ defer gpa.free(exec_path);
+ const exec_file = try std.fs.cwd().openFile(exec_path, .{});
+ defer exec_file.close();
+
+ switch (builtin.object_format) {
+ .elf => {
+ // We use two reader instances since the data they respectively read are
+ // not next to each other in the file.
+ //
+ // Multiple instances is safe since Elf.SectionHeaderIterator always calls
+ // seekTo (which we also use to arbitrarily set the index) and we always
+ // call seekTo to arbitrarily read from the string table.
+ var r_buf: [4096]u8 = undefined;
+ var r = exec_file.reader(&r_buf);
+ var str_r_buf: [4096]u8 = undefined;
+ var str_r = exec_file.reader(&str_r_buf);
+
+ const ehdr: std.elf.Header = try .read(&r.interface);
+ if (ehdr.shstrndx == 0) return error.NoElfStringTable;
+ var shdr_it = ehdr.iterateSectionHeaders(&r);
+
+ shdr_it.index = ehdr.shstrndx;
+ const str_tab_shdr = try shdr_it.next() orelse return error.InvalidElfSection;
+ const str_tab_off = str_tab_shdr.sh_offset;
+
+ shdr_it.index = 0;
+ while (try shdr_it.next()) |shdr| {
+ const flags: packed struct {
+ write: bool,
+ alloc: bool,
+ execinstr: bool,
+ } = @bitCast(@as(u3, @truncate(shdr.sh_flags)));
+ if (shdr.sh_addr == 0 or shdr.sh_size == 0 or flags != @TypeOf(flags){
+ .alloc = true,
+ .write = false,
+ .execinstr = false,
+ }) continue;
+
+ const rodata_name = ".rodata\x00";
+ try str_r.seekTo(try math.add(u64, str_tab_off, shdr.sh_name));
+ const section_name = str_r.interface.take(rodata_name.len) catch return r.err.?;
+ if (!std.mem.eql(u8, section_name, rodata_name))
+ continue;
+
+ const addr = math.cast(usize, shdr.sh_addr) orelse return error.Overflow;
+ const size = math.cast(usize, shdr.sh_size) orelse return error.Overflow;
+ _ = try math.add(usize, addr, size); // make sure there is no overflow
+ self.rodata_addr = addr;
+ self.rodata_size = size;
+ return;
+ }
+ return error.NoRodataSection;
+ },
+ else => return error.UnsupportedObjectFormat,
+ }
+ }
-export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
- handleCmp(@returnAddress(), arg1, arg2);
-}
+ fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList {
+ const pc_bitset_usizes = bitsetUsizes(pcs.len);
+ const coverage_file_name = std.fmt.hex(pc_digest);
+ comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
+ comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr);
-export fn __sanitizer_cov_trace_const_cmp8(arg1: u64, arg2: u64) void {
- handleCmp(@returnAddress(), arg1, arg2);
-}
+ var v = cache_dir.makeOpenPath("v", .{}) catch |e|
+ panic("failed to create directory 'v': {t}", .{e});
+ defer v.close();
+ const coverage_file, const populate = if (v.createFile(&coverage_file_name, .{
+ .read = true,
+ // If we create the file, we want to block other processes while we populate it
+ .lock = .exclusive,
+ .exclusive = true,
+ })) |f|
+ .{ f, true }
+ else |e| switch (e) {
+ error.PathAlreadyExists => .{ v.openFile(&coverage_file_name, .{
+ .mode = .read_write,
+ .lock = .shared,
+ }) catch |e2| panic(
+ "failed to open existing coverage file '{s}': {t}",
+ .{ &coverage_file_name, e2 },
+ ), false },
+ else => panic("failed to create coverage file '{s}': {t}", .{ &coverage_file_name, e }),
+ };
-export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
- handleCmp(@returnAddress(), arg1, arg2);
-}
+ const coverage_file_len = @sizeOf(abi.SeenPcsHeader) +
+ pc_bitset_usizes * @sizeOf(usize) +
+ pcs.len * @sizeOf(usize);
+ if (populate) {
+ defer coverage_file.lock(.shared) catch |e| panic(
+ "failed to demote lock for coverage file '{s}': {t}",
+ .{ &coverage_file_name, e },
+ );
+ var map = MemoryMappedList.create(coverage_file, 0, coverage_file_len) catch |e| panic(
+ "failed to init memory map for coverage file '{s}': {t}",
+ .{ &coverage_file_name, e },
+ );
+ map.appendSliceAssumeCapacity(mem.asBytes(&abi.SeenPcsHeader{
+ .n_runs = 0,
+ .unique_runs = 0,
+ .pcs_len = pcs.len,
+ }));
+ map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
+ map.appendSliceAssumeCapacity(mem.sliceAsBytes(pcs));
+ return map;
+ } else {
+ const size = coverage_file.getEndPos() catch |e| panic(
+ "failed to stat coverage file '{s}': {t}",
+ .{ &coverage_file_name, e },
+ );
+ if (size != coverage_file_len) panic(
+ "incompatible existing coverage file '{s}' (differing lengths: {} != {})",
+ .{ &coverage_file_name, size, coverage_file_len },
+ );
+
+ const map = MemoryMappedList.init(
+ coverage_file,
+ coverage_file_len,
+ coverage_file_len,
+ ) catch |e| panic(
+ "failed to init memory map for coverage file '{s}': {t}",
+ .{ &coverage_file_name, e },
+ );
+
+ const seen_pcs_header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(map.items));
+ if (seen_pcs_header.pcs_len != pcs.len) panic(
+ "incompatible existing coverage file '{s}' (differing pcs length: {} != {})",
+ .{ &coverage_file_name, seen_pcs_header.pcs_len, pcs.len },
+ );
+ if (mem.indexOfDiff(usize, seen_pcs_header.pcAddrs(), pcs)) |i| panic(
+ "incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})",
+ .{ &coverage_file_name, i, seen_pcs_header.pcAddrs()[i], pcs[i] },
+ );
+
+ return map;
+ }
+ }
-export fn __sanitizer_cov_trace_switch(val: u64, cases_ptr: [*]u64) void {
- const pc = @returnAddress();
- const len = cases_ptr[0];
- const val_size_in_bits = cases_ptr[1];
- const cases = cases_ptr[2..][0..len];
- fuzzer.traceValue(pc ^ val);
- _ = val_size_in_bits;
- _ = cases;
- //std.log.debug("0x{x}: switch on value {d} ({d} bits) with {d} cases", .{
- // pc, val, val_size_in_bits, cases.len,
- //});
-}
+ pub fn init(cache_dir_path: []const u8) Executable {
+ var self: Executable = undefined;
-export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
- // Not valuable because we already have pc tracing via 8bit counters.
- _ = callee;
- //const pc = @returnAddress();
- //fuzzer.traceValue(pc ^ callee);
- //std.log.debug("0x{x}: indirect call to 0x{x}", .{ pc, callee });
-}
-export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void {
- // clang will emit a call to this function when compiling with code coverage instrumentation.
- // however fuzzer_init() does not need this information, since it directly reads from the symbol table.
- _ = start;
- _ = end;
-}
-export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
- // clang will emit a call to this function when compiling with code coverage instrumentation.
- // however fuzzer_init() does not need this information, since it directly reads from the symbol table.
- _ = start;
- _ = end;
-}
+ const cache_dir = std.fs.cwd().makeOpenPath(cache_dir_path, .{}) catch |e| panic(
+ "failed to open directory '{s}': {t}",
+ .{ cache_dir_path, e },
+ );
+ log_f = cache_dir.createFile("tmp/libfuzzer.log", .{ .truncate = false }) catch |e|
+ panic("failed to create file 'tmp/libfuzzer.log': {t}", .{e});
+ self.cache_f = cache_dir.makeOpenPath("f", .{}) catch |e|
+ panic("failed to open directory 'f': {t}", .{e});
+
+ // Linkers are expected to automatically add symbols prefixed with these for the start and
+ // end of sections whose names are valid C identifiers.
+ const ofmt = builtin.object_format;
+ const section_start_prefix, const section_end_prefix = switch (ofmt) {
+ .elf => .{ "__start_", "__stop_" },
+ .macho => .{ "\x01section$start$__DATA$", "\x01section$end$__DATA$" },
+ else => @compileError("unsupported fuzzing object format '" ++ @tagName(ofmt) ++ "'"),
+ };
-fn handleCmp(pc: usize, arg1: u64, arg2: u64) void {
- fuzzer.traceValue(pc ^ arg1 ^ arg2);
- //std.log.debug("0x{x}: comparison of {d} and {d}", .{ pc, arg1, arg2 });
-}
+ self.pc_counters = blk: {
+ const pc_counters_start_name = section_start_prefix ++ "__sancov_cntrs";
+ const pc_counters_start = @extern([*]u8, .{
+ .name = pc_counters_start_name,
+ .linkage = .weak,
+ }) orelse panic("missing {s} symbol", .{pc_counters_start_name});
-const Fuzzer = struct {
- rng: std.Random.DefaultPrng,
- pcs: []const usize,
- pc_counters: []u8,
- n_runs: usize,
- traced_comparisons: std.AutoArrayHashMapUnmanaged(usize, void),
- /// Tracks which PCs have been seen across all runs that do not crash the fuzzer process.
- /// Stored in a memory-mapped file so that it can be shared with other
- /// processes and viewed while the fuzzer is running.
- seen_pcs: MemoryMappedList,
- cache_dir: std.fs.Dir,
- /// Identifies the file name that will be used to store coverage
- /// information, available to other processes.
- coverage_id: u64,
- unit_test_name: []const u8,
-
- /// The index corresponds to the file name within the f/ subdirectory.
- /// The string is the input.
- /// This data is read-only; it caches what is on the filesystem.
- corpus: std.ArrayListUnmanaged(Input),
- corpus_directory: std.Build.Cache.Directory,
+ const pc_counters_end_name = section_end_prefix ++ "__sancov_cntrs";
+ const pc_counters_end = @extern([*]u8, .{
+ .name = pc_counters_end_name,
+ .linkage = .weak,
+ }) orelse panic("missing {s} symbol", .{pc_counters_end_name});
- /// The next input that will be given to the testOne function. When the
- /// current process crashes, this memory-mapped file is used to recover the
- /// input.
- ///
- /// The file size corresponds to the capacity. The length is not stored
- /// and that is the next thing to work on!
- input: MemoryMappedList,
+ break :blk pc_counters_start[0 .. pc_counters_end - pc_counters_start];
+ };
- const Input = struct {
- bytes: []u8,
- last_traced_comparison: usize,
- };
+ const pcs = blk: {
+ const pcs_start_name = section_start_prefix ++ "__sancov_pcs1";
+ const pcs_start = @extern([*]usize, .{
+ .name = pcs_start_name,
+ .linkage = .weak,
+ }) orelse panic("missing {s} symbol", .{pcs_start_name});
- const Slice = extern struct {
- ptr: [*]const u8,
- len: usize,
+ const pcs_end_name = section_end_prefix ++ "__sancov_pcs1";
+ const pcs_end = @extern([*]usize, .{
+ .name = pcs_end_name,
+ .linkage = .weak,
+ }) orelse panic("missing {s} symbol", .{pcs_end_name});
- fn toZig(s: Slice) []const u8 {
- return s.ptr[0..s.len];
- }
+ break :blk pcs_start[0 .. pcs_end - pcs_start];
+ };
- fn fromZig(s: []const u8) Slice {
- return .{
- .ptr = s.ptr,
- .len = s.len,
- };
+ if (self.pc_counters.len != pcs.len) panic(
+ "pc counters length and pcs length do not match ({} != {})",
+ .{ self.pc_counters.len, pcs.len },
+ );
+
+ self.initRodata() catch |e| if (e != error.UnsupportedObjectFormat) std.log.err(
+ \\failed to enumerate read-only memory: {t}
+ \\efficiency will be severly reduced
+ , .{e});
+
+ self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs));
+ self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
+
+ return self;
+ }
+
+ pub fn pcBitsetIterator(self: Executable) PcBitsetIterator {
+ return .{ .pc_counters = self.pc_counters };
+ }
+
+ /// Iterates over pc_counters returning a bitset for if each of them have been hit
+ pub const PcBitsetIterator = struct {
+ index: usize = 0,
+ pc_counters: []u8,
+
+ pub fn next(self: *PcBitsetIterator) usize {
+ const rest = self.pc_counters[self.index..];
+ if (rest.len >= @bitSizeOf(usize)) {
+ defer self.index += @bitSizeOf(usize);
+ const V = @Vector(@bitSizeOf(usize), u8);
+ return @as(usize, @bitCast(@as(V, @splat(0)) != rest[0..@bitSizeOf(usize)].*));
+ } else if (rest.len != 0) {
+ defer self.index += rest.len;
+ var res: usize = 0;
+ for (0.., rest) |bit_index, byte| {
+ res |= @shlExact(@as(usize, @intFromBool(byte != 0)), @intCast(bit_index));
+ }
+ return res;
+ } else unreachable;
}
};
+};
- fn init(f: *Fuzzer, cache_dir: std.fs.Dir, pc_counters: []u8, pcs: []const usize) !void {
- f.cache_dir = cache_dir;
- f.pc_counters = pc_counters;
- f.pcs = pcs;
-
- // Choose a file name for the coverage based on a hash of the PCs that will be stored within.
- const pc_digest = std.hash.Wyhash.hash(0, std.mem.sliceAsBytes(pcs));
- f.coverage_id = pc_digest;
- const hex_digest = std.fmt.hex(pc_digest);
- const coverage_file_path = "v/" ++ hex_digest;
-
- // Layout of this file:
- // - Header
- // - list of PC addresses (usize elements)
- // - list of hit flag, 1 bit per address (stored in u8 elements)
- const coverage_file = createFileBail(cache_dir, coverage_file_path, .{
- .read = true,
- .truncate = false,
- });
- const n_bitset_elems = (pcs.len + @bitSizeOf(usize) - 1) / @bitSizeOf(usize);
- comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize);
- comptime assert(SeenPcsHeader.trailing[1] == .pc_addr);
- const bytes_len = @sizeOf(SeenPcsHeader) +
- n_bitset_elems * @sizeOf(usize) +
- pcs.len * @sizeOf(usize);
- const existing_len = coverage_file.getEndPos() catch |err| {
- fatal("unable to check len of coverage file: {s}", .{@errorName(err)});
+/// Data gathered from instrumentation functions
+/// Seperate from Executable since its state is resetable and changes
+/// Seperate from Fuzzer since it may be needed before fuzzing starts
+const Instrumentation = struct {
+ /// Bitset of seen pcs across all runs excluding fresh pcs.
+ /// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using
+ /// it which causes contention and unrelated pcs to our campaign being set.
+ seen_pcs: []usize,
+ /// Bitset of seen rodata bytes read across all runs
+ seen_rodata_loads: []usize,
+
+ /// Bitset of run's read bytes that weren't present in seen_loads
+ /// Elements are always zero if !any_new_data_loads
+ new_rodata_loads: []usize,
+ any_new_rodata_loads: bool,
+
+ /// Stores a fresh input's new pcs
+ fresh_pcs: []usize,
+ /// Stores a fresh input's new reads
+ /// Elements are always zero if !any_fresh_rodata_loads
+ fresh_rodata_loads: []usize,
+ any_fresh_rodata_loads: bool,
+
+ /// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx
+ /// have been called from and have had their already been added to const_x_vals
+ const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty,
+ /// Values that have been constant operands in comparisons, switch cases, or memory reads
+ /// There may be duplicates in this array if they came from different addresses, which is
+ /// fine as they are likely more important and hence more likely to be selected.
+ const_vals2: std.ArrayListUnmanaged(u16) = .empty,
+ const_vals4: std.ArrayListUnmanaged(u32) = .empty,
+ const_vals8: std.ArrayListUnmanaged(u64) = .empty,
+ const_vals16: std.ArrayListUnmanaged(u128) = .empty,
+
+ /// A minimal state for this struct which instrumentation can function on.
+ /// Used before this structure is initialized to avoid illegal behavior
+ /// from instrumentation functions being called and using undefined values.
+ pub const preinit: Instrumentation = .{
+ .seen_pcs = undefined, // currently only updated by `Fuzzer`
+ .seen_rodata_loads = undefined,
+ .new_rodata_loads = undefined,
+ .any_new_rodata_loads = undefined,
+ .fresh_pcs = undefined,
+ .fresh_rodata_loads = undefined,
+ .any_fresh_rodata_loads = undefined,
+ };
+
+ pub fn depreinit(self: *Instrumentation) void {
+ self.const_vals2.deinit(gpa);
+ self.const_vals4.deinit(gpa);
+ self.const_vals8.deinit(gpa);
+ self.const_vals16.deinit(gpa);
+ self.* = undefined;
+ }
+
+ pub fn init() Instrumentation {
+ const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len);
+ const rodata_bitset_usizes = bitsetUsizes(exec.rodata_size);
+ const alloc_usizes = pc_bitset_usizes * 2 + rodata_bitset_usizes * 3;
+ const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM");
+ var fba_ctx: std.heap.FixedBufferAllocator = .init(buf);
+ const fba = fba_ctx.allocator();
+
+ var self: Instrumentation = .{
+ .seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
+ .seen_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable,
+ .new_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable,
+ .any_new_rodata_loads = undefined,
+ .fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
+ .fresh_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable,
+ .any_fresh_rodata_loads = undefined,
};
- if (existing_len == 0) {
- coverage_file.setEndPos(bytes_len) catch |err| {
- fatal("unable to set len of coverage file: {s}", .{@errorName(err)});
- };
- } else if (existing_len != bytes_len) {
- fatal("incompatible existing coverage file (differing lengths)", .{});
+ self.reset();
+ return self;
+ }
+
+ pub fn reset(self: *Instrumentation) void {
+ @memset(self.seen_pcs, 0);
+ @memset(self.seen_rodata_loads, 0);
+ @memset(self.new_rodata_loads, 0);
+ self.any_new_rodata_loads = false;
+ @memset(self.fresh_pcs, 0);
+ @memset(self.fresh_rodata_loads, 0);
+ self.any_fresh_rodata_loads = false;
+ self.const_pcs.clearRetainingCapacity();
+ self.const_vals2.clearRetainingCapacity();
+ self.const_vals4.clearRetainingCapacity();
+ self.const_vals8.clearRetainingCapacity();
+ self.const_vals16.clearRetainingCapacity();
+ }
+
+ /// If false is returned, then the pc is marked as seen
+ pub fn constPcSeen(self: *Instrumentation, pc: usize) bool {
+ return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing;
+ }
+
+ pub fn clearNewRodataLoads(self: *Instrumentation) void {
+ if (self.any_new_rodata_loads) {
+ @memset(self.new_rodata_loads, 0);
+ self.any_new_rodata_loads = false;
}
- f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| {
- fatal("unable to init coverage memory map: {s}", .{@errorName(err)});
- };
- if (existing_len != 0) {
- const existing_pcs_bytes = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + @sizeOf(usize) * n_bitset_elems ..][0 .. pcs.len * @sizeOf(usize)];
- const existing_pcs = std.mem.bytesAsSlice(usize, existing_pcs_bytes);
- for (existing_pcs, pcs, 0..) |old, new, i| {
- if (old != new) {
- fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{
- i, old, new,
- });
- }
- }
- } else {
- const header: SeenPcsHeader = .{
- .n_runs = 0,
- .unique_runs = 0,
- .pcs_len = pcs.len,
- };
- f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header));
- f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems * @sizeOf(usize));
- f.seen_pcs.appendSliceAssumeCapacity(std.mem.sliceAsBytes(pcs));
+ }
+
+ pub fn isFresh(self: *Instrumentation) bool {
+ if (self.any_new_rodata_loads) return true;
+
+ var hit_pcs = exec.pcBitsetIterator();
+ for (self.seen_pcs) |seen_pcs| {
+ if (hit_pcs.next() & ~seen_pcs != 0) return true;
}
+
+ return false;
}
- fn initNextInput(f: *Fuzzer) void {
- while (true) {
- const i = f.corpus.items.len;
- var buf: [30]u8 = undefined;
- const input_sub_path = std.fmt.bufPrint(&buf, "{d}", .{i}) catch unreachable;
- const input = f.corpus_directory.handle.readFileAlloc(input_sub_path, gpa, .limited(1 << 31)) catch |err| switch (err) {
- error.FileNotFound => {
- // Make this one the next input.
- const input_file = f.corpus_directory.handle.createFile(input_sub_path, .{
- .exclusive = true,
- .truncate = false,
- .read = true,
- }) catch |e| switch (e) {
- error.PathAlreadyExists => continue,
- else => fatal("unable to create '{f}{d}: {s}", .{ f.corpus_directory, i, @errorName(err) }),
- };
- errdefer input_file.close();
- // Initialize the mmap for the current input.
- f.input = MemoryMappedList.create(input_file, 0, std.heap.page_size_max) catch |e| {
- fatal("unable to init memory map for input at '{f}{d}': {s}", .{
- f.corpus_directory, i, @errorName(e),
- });
- };
- break;
- },
- else => fatal("unable to read '{f}{d}': {s}", .{ f.corpus_directory, i, @errorName(err) }),
- };
- errdefer gpa.free(input);
- f.corpus.append(gpa, .{
- .bytes = input,
- .last_traced_comparison = 0,
- }) catch |err| oom(err);
+ /// Updates fresh_pcs and fresh_rodata_loads
+ /// any_new_rodata_loads and elements of new_rodata_loads are unspecified
+ /// afterwards, but still valid.
+ pub fn setFresh(self: *Instrumentation) void {
+ var hit_pcs = exec.pcBitsetIterator();
+ for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| {
+ fresh_pcs.* = hit_pcs.next() & ~seen_pcs;
}
+
+ mem.swap([]usize, &self.fresh_rodata_loads, &self.new_rodata_loads);
+ mem.swap(bool, &self.any_fresh_rodata_loads, &self.any_new_rodata_loads);
}
- fn addCorpusElem(f: *Fuzzer, input: []const u8) !void {
- try f.corpus.append(gpa, .{
- .bytes = try gpa.dupe(u8, input),
- .last_traced_comparison = 0,
- });
+ /// Returns if exec.pc_counters and new_rodata_loads are the same or a superset of fresh_pcs and
+ /// fresh_rodata_loads respectively.
+ pub fn atleastFresh(self: *Instrumentation) bool {
+ var hit_pcs = exec.pcBitsetIterator();
+ for (self.fresh_pcs) |fresh_pcs| {
+ if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false;
+ }
+
+ if (self.any_fresh_rodata_loads) {
+ if (!self.any_new_rodata_loads) return false;
+ for (self.new_rodata_loads, self.fresh_rodata_loads) |n, f| {
+ if (n & f != f) return false;
+ }
+ }
+
+ return true;
}
- fn start(f: *Fuzzer) !void {
- const rng = fuzzer.rng.random();
+ /// Updates based off fresh_pcs and fresh_rodata_loads
+ fn updateSeen(self: *Instrumentation) void {
+ comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
+ const shared_seen_pcs: [*]volatile usize = @ptrCast(
+ exec.shared_seen_pcs.items[@sizeOf(abi.SeenPcsHeader)..].ptr,
+ );
- // Grab the corpus which is namespaced based on `unit_test_name`.
- {
- if (f.unit_test_name.len == 0) fatal("test runner never set unit test name", .{});
- const sub_path = try std.fmt.allocPrint(gpa, "f/{s}", .{f.unit_test_name});
- f.corpus_directory = .{
- .handle = f.cache_dir.makeOpenPath(sub_path, .{}) catch |err|
- fatal("unable to open corpus directory 'f/{s}': {t}", .{ sub_path, err }),
- .path = sub_path,
- };
- initNextInput(f);
+ for (self.seen_pcs, shared_seen_pcs, self.fresh_pcs) |*seen, *shared_seen, fresh| {
+ seen.* |= fresh;
+ if (fresh != 0)
+ _ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic);
}
- assert(f.n_runs == 0);
-
- // If the corpus is empty, synthesize one input.
- if (f.corpus.items.len == 0) {
- const len = rng.uintLessThanBiased(usize, 200);
- const slice = try gpa.alloc(u8, len);
- rng.bytes(slice);
- f.input.appendSliceAssumeCapacity(slice);
- try f.corpus.append(gpa, .{
- .bytes = slice,
- .last_traced_comparison = 0,
- });
- runOne(f, 0);
+ if (self.any_fresh_rodata_loads) {
+ for (self.seen_rodata_loads, self.fresh_rodata_loads) |*s, f|
+ s.* |= f;
}
+ }
+};
+
+const Fuzzer = struct {
+ arena_ctx: std.heap.ArenaAllocator = .init(gpa),
+ rng: std.Random.DefaultPrng = .init(0),
+ test_one: abi.TestOne,
+ /// The next input that will be given to the testOne function. When the
+ /// current process crashes, this memory-mapped file is used to recover the
+ /// input.
+ input: MemoryMappedList,
+
+ /// Minimized past inputs leading to new pcs or rodata reads. These are randomly mutated in
+ /// round-robin fashion
+ /// Element zero is always an empty input. It is gauraunteed no other elements are empty.
+ corpus: std.ArrayListUnmanaged([]const u8),
+ corpus_pos: usize,
+ /// List of past mutations that have led to new inputs. This way, the mutations that are the
+ /// most effective are the most likely to be selected again. Starts with one of each mutation.
+ mutations: std.ArrayListUnmanaged(Mutation) = .empty,
+
+ /// Filesystem directory containing found inputs for future runs
+ corpus_dir: std.fs.Dir,
+ corpus_dir_idx: usize = 0,
+
+ pub fn init(test_one: abi.TestOne, unit_test_name: []const u8) Fuzzer {
+ var self: Fuzzer = .{
+ .test_one = test_one,
+ .input = undefined,
+ .corpus = .empty,
+ .corpus_pos = 0,
+ .mutations = .empty,
+ .corpus_dir = undefined,
+ };
+ const arena = self.arena_ctx.allocator();
+
+ self.corpus_dir = exec.cache_f.makeOpenPath(unit_test_name, .{}) catch |e|
+ panic("failed to open directory '{s}': {t}", .{ unit_test_name, e });
+ self.input = in: {
+ const f = self.corpus_dir.createFile("in", .{
+ .read = true,
+ .truncate = false,
+ // In case any other fuzz tests are running under the same test name,
+ // the input file is exclusively locked to ensures only one proceeds.
+ .lock = .exclusive,
+ .lock_nonblocking = true,
+ }) catch |e| switch (e) {
+ error.WouldBlock => @panic("input file 'in' is in use by another fuzzing process"),
+ else => panic("failed to create input file 'in': {t}", .{e}),
+ };
+ const size = f.getEndPos() catch |e| panic("failed to stat input file 'in': {t}", .{e});
+ const map = (if (size < std.heap.page_size_max)
+ MemoryMappedList.create(f, 8, std.heap.page_size_max)
+ else
+ MemoryMappedList.init(f, size, size)) catch |e|
+ panic("failed to memory map input file 'in': {t}", .{e});
+
+ // Perform a dry-run of the stored input if there was one in case it might reproduce a
+ // crash.
+ const old_in_len = mem.littleToNative(usize, mem.bytesAsValue(usize, map.items[0..8]).*);
+ if (size >= 8 and old_in_len != 0 and map.items.len - 8 < old_in_len) {
+ test_one(.fromSlice(@volatileCast(map.items[8..][0..old_in_len])));
+ }
+
+ break :in map;
+ };
+ inst.reset();
+
+ self.mutations.appendSlice(gpa, std.meta.tags(Mutation)) catch @panic("OOM");
+ // Ensure there is never an empty corpus. Additionally, an empty input usually leads to
+ // new inputs.
+ self.addInput(&.{});
while (true) {
- const chosen_index = rng.uintLessThanBiased(usize, f.corpus.items.len);
- const modification = rng.enumValue(Mutation);
- f.mutateAndRunOne(chosen_index, modification);
+ var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
+ const bytes = self.corpus_dir.readFileAlloc(
+ std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable,
+ arena,
+ .unlimited,
+ ) catch |e| switch (e) {
+ error.FileNotFound => break,
+ else => panic("failed to read corpus file '{x}': {t}", .{ self.corpus_dir_idx, e }),
+ };
+ // No corpus file of length zero will ever be created
+ if (bytes.len == 0)
+ panic("corrupt corpus file '{x}' (len of zero)", .{self.corpus_dir_idx});
+ self.addInput(bytes);
+ self.corpus_dir_idx += 1;
}
+
+ return self;
}
- /// `x` represents a possible branch. It is the PC address of the possible
- /// branch site, hashed together with the value(s) used that determine to
- /// where it branches.
- fn traceValue(f: *Fuzzer, x: usize) void {
- errdefer |err| oom(err);
- try f.traced_comparisons.put(gpa, x, {});
+ pub fn deinit(self: *Fuzzer) void {
+ self.input.deinit();
+ self.corpus.deinit(gpa);
+ self.mutations.deinit(gpa);
+ self.corpus_dir.close();
+ self.arena_ctx.deinit();
+ self.* = undefined;
}
- const Mutation = enum {
- remove_byte,
- modify_byte,
- add_byte,
- };
+ pub fn addInput(self: *Fuzzer, bytes: []const u8) void {
+ self.corpus.append(gpa, bytes) catch @panic("OOM");
+ self.input.clearRetainingCapacity();
+ self.input.ensureTotalCapacity(8 + bytes.len) catch |e|
+ panic("could not resize shared input file: {t}", .{e});
+ self.input.items.len = 8;
+ self.input.appendSliceAssumeCapacity(bytes);
+ self.run();
+ inst.setFresh();
+ inst.updateSeen();
+ inst.clearNewRodataLoads();
+ }
- fn mutateAndRunOne(f: *Fuzzer, corpus_index: usize, mutation: Mutation) void {
- const rng = fuzzer.rng.random();
- f.input.clearRetainingCapacity();
- const old_input = f.corpus.items[corpus_index].bytes;
- f.input.ensureTotalCapacity(old_input.len + 1) catch @panic("mmap file resize failed");
- switch (mutation) {
- .remove_byte => {
- const omitted_index = rng.uintLessThanBiased(usize, old_input.len);
- f.input.appendSliceAssumeCapacity(old_input[0..omitted_index]);
- f.input.appendSliceAssumeCapacity(old_input[omitted_index + 1 ..]);
- },
- .modify_byte => {
- const modified_index = rng.uintLessThanBiased(usize, old_input.len);
- f.input.appendSliceAssumeCapacity(old_input);
- f.input.items[modified_index] = rng.int(u8);
- },
- .add_byte => {
- const modified_index = rng.uintLessThanBiased(usize, old_input.len);
- f.input.appendSliceAssumeCapacity(old_input[0..modified_index]);
- f.input.appendAssumeCapacity(rng.int(u8));
- f.input.appendSliceAssumeCapacity(old_input[modified_index..]);
- },
+ /// Assumes fresh_pcs and fresh_rodata_loads correspond to the input
+ fn minimizeInput(self: *Fuzzer) void {
+ // The minimization technique is kept relatively simple, we sequentially try to remove each
+ // byte and check that the new pcs and memory loads are still hit.
+ var i = self.input.items.len;
+ while (i != 8) {
+ i -= 1;
+ const old = self.input.orderedRemove(i);
+
+ @memset(exec.pc_counters, 0);
+ inst.clearNewRodataLoads();
+ self.run();
+
+ if (!inst.atleastFresh()) {
+ self.input.insertAssumeCapacity(i, old);
+ } else {
+ // This removal may have led to new pcs or memory loads being hit, so we need to
+ // update them to avoid duplicates.
+ inst.setFresh();
+ }
}
- runOne(f, corpus_index);
}
- fn runOne(f: *Fuzzer, corpus_index: usize) void {
- const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]);
-
- f.traced_comparisons.clearRetainingCapacity();
- @memset(f.pc_counters, 0);
- __sancov_lowest_stack = std.math.maxInt(usize);
+ fn run(self: *Fuzzer) void {
+ // We don't need to clear pc_counters here; all we care about is new hits and not already
+ // seen hits. Ideally, we wouldn't even have these counters and do something similiar to
+ // what we do for tracking memory (i.e. a __sanitizer_cov function that updates a flag on a
+ // new hit.)
+ assert(!inst.any_new_rodata_loads);
- fuzzer_one(@volatileCast(f.input.items.ptr), f.input.items.len);
+ mem.bytesAsValue(usize, self.input.items[0..8]).* =
+ mem.nativeToLittle(usize, self.input.items.len - 8);
+ self.test_one(.fromSlice(@volatileCast(self.input.items[8..])));
- f.n_runs += 1;
+ const header = mem.bytesAsValue(
+ abi.SeenPcsHeader,
+ exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
+ );
_ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
+ }
- // Track code coverage from all runs.
- comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize);
- const header_end_ptr: [*]volatile usize = @ptrCast(f.seen_pcs.items[@sizeOf(SeenPcsHeader)..]);
- const remainder = f.pcs.len % @bitSizeOf(usize);
- const aligned_len = f.pcs.len - remainder;
- const seen_pcs = header_end_ptr[0..aligned_len];
- const pc_counters = std.mem.bytesAsSlice([@bitSizeOf(usize)]u8, f.pc_counters[0..aligned_len]);
- const V = @Vector(@bitSizeOf(usize), u8);
- const zero_v: V = @splat(0);
- var fresh = false;
- var superset = true;
-
- for (header_end_ptr[0..pc_counters.len], pc_counters) |*elem, *array| {
- const v: V = array.*;
- const mask: usize = @bitCast(v != zero_v);
- const prev = @atomicRmw(usize, elem, .Or, mask, .monotonic);
- fresh = fresh or (prev | mask) != prev;
- superset = superset and (prev | mask) != mask;
- }
- if (remainder > 0) {
- const i = pc_counters.len;
- const elem = &seen_pcs[i];
- var mask: usize = 0;
- for (f.pc_counters[i * @bitSizeOf(usize) ..][0..remainder], 0..) |byte, bit_index| {
- mask |= @as(usize, @intFromBool(byte != 0)) << @intCast(bit_index);
+ pub fn cycle(self: *Fuzzer) void {
+ const input = self.corpus.items[self.corpus_pos];
+ self.corpus_pos += 1;
+ if (self.corpus_pos == self.corpus.items.len)
+ self.corpus_pos = 0;
+
+ const rng = self.rng.random();
+ while (true) {
+ const m = self.mutations.items[rng.uintLessThanBiased(usize, self.mutations.items.len)];
+ if (!m.mutate(
+ rng,
+ input,
+ &self.input,
+ self.corpus.items,
+ inst.const_vals2.items,
+ inst.const_vals4.items,
+ inst.const_vals8.items,
+ inst.const_vals16.items,
+ )) continue;
+
+ self.run();
+ if (inst.isFresh()) {
+ @branchHint(.unlikely);
+
+ const header = mem.bytesAsValue(
+ abi.SeenPcsHeader,
+ exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
+ );
+ _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
+
+ inst.setFresh();
+ self.minimizeInput();
+ inst.updateSeen();
+ inst.clearNewRodataLoads();
+
+ // An empty-input has always been tried, so if an empty input is fresh then the
+ // test has to be non-deterministic. This has to be checked as duplicate empty
+ // entries are not allowed.
+ if (self.input.items.len - 8 == 0) {
+ std.log.warn("non-deterministic test (empty input produces different hits)", .{});
+ _ = @atomicRmw(usize, &header.unique_runs, .Sub, 1, .monotonic);
+ return;
+ }
+
+ const arena = self.arena_ctx.allocator();
+ const bytes = arena.dupe(u8, @volatileCast(self.input.items[8..])) catch @panic("OOM");
+
+ self.corpus.append(gpa, bytes) catch @panic("OOM");
+ self.mutations.appendNTimes(gpa, m, 6) catch @panic("OOM");
+
+ // Write new corpus to cache
+ var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
+ self.corpus_dir.writeFile(.{
+ .sub_path = std.fmt.bufPrint(
+ &name_buf,
+ "{x}",
+ .{self.corpus_dir_idx},
+ ) catch unreachable,
+ .data = bytes,
+ }) catch |e| panic(
+ "failed to write corpus file '{x}': {t}",
+ .{ self.corpus_dir_idx, e },
+ );
+ self.corpus_dir_idx += 1;
}
- const prev = @atomicRmw(usize, elem, .Or, mask, .monotonic);
- fresh = fresh or (prev | mask) != prev;
- superset = superset and (prev | mask) != mask;
- }
- // First check if this is a better version of an already existing
- // input, replacing that input.
- if (superset or f.traced_comparisons.entries.len >= f.corpus.items[corpus_index].last_traced_comparison) {
- const new_input = gpa.realloc(f.corpus.items[corpus_index].bytes, f.input.items.len) catch |err| oom(err);
- f.corpus.items[corpus_index] = .{
- .bytes = new_input,
- .last_traced_comparison = f.traced_comparisons.count(),
- };
- @memcpy(new_input, @volatileCast(f.input.items));
- _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
- return;
+ break;
}
+ }
+};
- if (!fresh) return;
+/// Instrumentation must not be triggered before this function is called
+export fn fuzzer_init(cache_dir_path: abi.Slice) void {
+ inst.depreinit();
+ exec = .init(cache_dir_path.toSlice());
+ inst = .init();
+}
- // Input is already committed to the file system, we just need to open a new file
- // for the next input.
- // Pre-add it to the corpus list so that it does not get redundantly picked up.
- f.corpus.append(gpa, .{
- .bytes = gpa.dupe(u8, @volatileCast(f.input.items)) catch |err| oom(err),
- .last_traced_comparison = f.traced_comparisons.entries.len,
- }) catch |err| oom(err);
- f.input.deinit();
- initNextInput(f);
+/// Invalid until `fuzzer_init` is called.
+export fn fuzzer_coverage_id() u64 {
+ return exec.pc_digest;
+}
- // TODO: also mark input as "hot" so it gets prioritized for checking mutations above others.
+/// fuzzer_init must be called beforehand
+export fn fuzzer_init_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void {
+ current_test_name = unit_test_name.toSlice();
+ fuzzer = .init(test_one, unit_test_name.toSlice());
+}
- _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
+/// fuzzer_init_test must be called beforehand
+/// The callee owns the memory of bytes and must not free it until the fuzzer is finished.
+export fn fuzzer_new_input(bytes: abi.Slice) void {
+ // An entry of length zero is always added and duplicates of it are not allowed.
+ if (bytes.len != 0)
+ fuzzer.addInput(bytes.toSlice());
+}
+
+/// fuzzer_init_test must be called first
+export fn fuzzer_main() void {
+ while (true) {
+ fuzzer.cycle();
}
-};
+}
-fn createFileBail(dir: std.fs.Dir, sub_path: []const u8, flags: std.fs.File.CreateFlags) std.fs.File {
- return dir.createFile(sub_path, flags) catch |err| switch (err) {
- error.FileNotFound => {
- const dir_name = std.fs.path.dirname(sub_path).?;
- dir.makePath(dir_name) catch |e| {
- fatal("unable to make path '{s}': {s}", .{ dir_name, @errorName(e) });
- };
- return dir.createFile(sub_path, flags) catch |e| {
- fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(e) });
- };
- },
- else => fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(err) }),
- };
+/// Helps determine run uniqueness in the face of recursion.
+/// Currently not used by the fuzzer.
+export threadlocal var __sancov_lowest_stack: usize = 0;
+
+/// Inline since the return address of the callee is required
+inline fn genericConstCmp(T: anytype, val: T, comptime const_vals_field: []const u8) void {
+ if (!inst.constPcSeen(@returnAddress())) {
+ @branchHint(.unlikely);
+ @field(inst, const_vals_field).append(gpa, val) catch @panic("OOM");
+ }
+}
+
+export fn __sanitizer_cov_trace_const_cmp1(const_arg: u8, arg: u8) void {
+ _ = const_arg;
+ _ = arg;
+}
+
+export fn __sanitizer_cov_trace_const_cmp2(const_arg: u16, arg: u16) void {
+ _ = arg;
+ genericConstCmp(u16, const_arg, "const_vals2");
+}
+
+export fn __sanitizer_cov_trace_const_cmp4(const_arg: u32, arg: u32) void {
+ _ = arg;
+ genericConstCmp(u32, const_arg, "const_vals4");
+}
+
+export fn __sanitizer_cov_trace_const_cmp8(const_arg: u64, arg: u64) void {
+ _ = arg;
+ genericConstCmp(u64, const_arg, "const_vals8");
}
-fn oom(err: anytype) noreturn {
- switch (err) {
- error.OutOfMemory => @panic("out of memory"),
+export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void {
+ _ = val;
+ if (!inst.constPcSeen(@returnAddress())) {
+ @branchHint(.unlikely);
+ const case_bits = cases[1];
+ const cases_slice = cases[2..][0..cases[0]];
+ switch (case_bits) {
+ // 8-bit cases are ignored because they are likely to be randomly generated
+ 0...8 => {},
+ 9...16 => for (cases_slice) |c|
+ inst.const_vals2.append(gpa, @truncate(c)) catch @panic("OOM"),
+ 17...32 => for (cases_slice) |c|
+ inst.const_vals4.append(gpa, @truncate(c)) catch @panic("OOM"),
+ 33...64 => for (cases_slice) |c|
+ inst.const_vals8.append(gpa, @truncate(c)) catch @panic("OOM"),
+ else => {}, // Should be impossible
+ }
}
}
-var debug_allocator: std.heap.GeneralPurposeAllocator(.{}) = .init;
+fn genericLoad(T: anytype, ptr: *align(1) const T, comptime opt_const_vals_field: ?[]const u8) void {
+ const addr = @intFromPtr(ptr);
+ const off = addr -% exec.rodata_addr;
+ if (off >= exec.rodata_size) {
+ @branchHint(.likely);
+ return;
+ }
-const gpa = switch (builtin.mode) {
- .Debug => debug_allocator.allocator(),
- .ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator,
-};
+ const i = off / @bitSizeOf(usize);
+ // Bits are intentionally truncated since the pointer will almost always be aligned
+ const hit = (@as(usize, (1 << @sizeOf(T)) - 1)) << @intCast(off % @bitSizeOf(usize));
+ const new = hit & ~inst.seen_rodata_loads[i];
+ if (new == 0) {
+ @branchHint(.likely);
+ return;
+ }
-var fuzzer: Fuzzer = .{
- .rng = std.Random.DefaultPrng.init(0),
- .input = undefined,
- .pcs = undefined,
- .pc_counters = undefined,
- .n_runs = 0,
- .cache_dir = undefined,
- .seen_pcs = undefined,
- .coverage_id = undefined,
- .unit_test_name = &.{},
- .corpus = .empty,
- .corpus_directory = undefined,
- .traced_comparisons = .empty,
-};
+ inst.new_rodata_loads[i] |= new;
+ inst.any_new_rodata_loads = true;
-/// Invalid until `fuzzer_init` is called.
-export fn fuzzer_coverage_id() u64 {
- return fuzzer.coverage_id;
+ if (opt_const_vals_field) |const_vals_field| {
+ // This may have already been hit and this run is just being used for evaluating the
+ // input, in which case we do not want to readd the same value.
+ if (inst.any_fresh_rodata_loads) {
+ @branchHint(.unlikely);
+ if (new & ~inst.fresh_rodata_loads[i] == 0)
+ return;
+ }
+ @field(inst, const_vals_field).append(gpa, ptr.*) catch @panic("OOM");
+ }
}
-var fuzzer_one: *const fn (input_ptr: [*]const u8, input_len: usize) callconv(.c) void = undefined;
+export fn __sanitizer_cov_load1(ptr: *align(1) const u8) void {
+ genericLoad(u8, ptr, null);
+}
-export fn fuzzer_start(testOne: @TypeOf(fuzzer_one)) void {
- fuzzer_one = testOne;
- fuzzer.start() catch |err| oom(err);
+export fn __sanitizer_cov_load2(ptr: *align(1) const u16) void {
+ genericLoad(u16, ptr, "const_vals2");
}
-export fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void {
- fuzzer.unit_test_name = name_ptr[0..name_len];
+export fn __sanitizer_cov_load4(ptr: *align(1) const u32) void {
+ genericLoad(u32, ptr, "const_vals4");
}
-export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void {
- // Linkers are expected to automatically add `__start_<section>` and
- // `__stop_<section>` symbols when section names are valid C identifiers.
-
- const ofmt = builtin.object_format;
-
- const start_symbol_prefix: []const u8 = if (ofmt == .macho)
- "\x01section$start$__DATA$__"
- else
- "__start___";
- const end_symbol_prefix: []const u8 = if (ofmt == .macho)
- "\x01section$end$__DATA$__"
- else
- "__stop___";
-
- const pc_counters_start_name = start_symbol_prefix ++ "sancov_cntrs";
- const pc_counters_start = @extern([*]u8, .{
- .name = pc_counters_start_name,
- .linkage = .weak,
- }) orelse fatal("missing {s} symbol", .{pc_counters_start_name});
-
- const pc_counters_end_name = end_symbol_prefix ++ "sancov_cntrs";
- const pc_counters_end = @extern([*]u8, .{
- .name = pc_counters_end_name,
- .linkage = .weak,
- }) orelse fatal("missing {s} symbol", .{pc_counters_end_name});
-
- const pc_counters = pc_counters_start[0 .. pc_counters_end - pc_counters_start];
-
- const pcs_start_name = start_symbol_prefix ++ "sancov_pcs1";
- const pcs_start = @extern([*]usize, .{
- .name = pcs_start_name,
- .linkage = .weak,
- }) orelse fatal("missing {s} symbol", .{pcs_start_name});
-
- const pcs_end_name = end_symbol_prefix ++ "sancov_pcs1";
- const pcs_end = @extern([*]usize, .{
- .name = pcs_end_name,
- .linkage = .weak,
- }) orelse fatal("missing {s} symbol", .{pcs_end_name});
-
- const pcs = pcs_start[0 .. pcs_end - pcs_start];
-
- const cache_dir_path = cache_dir_struct.toZig();
- const cache_dir = if (cache_dir_path.len == 0)
- std.fs.cwd()
- else
- std.fs.cwd().makeOpenPath(cache_dir_path, .{ .iterate = true }) catch |err| {
- fatal("unable to open fuzz directory '{s}': {s}", .{ cache_dir_path, @errorName(err) });
- };
+export fn __sanitizer_cov_load8(ptr: *align(1) const u64) void {
+ genericLoad(u64, ptr, "const_vals8");
+}
+
+export fn __sanitizer_cov_load16(ptr: *align(1) const u128) void {
+ genericLoad(u128, ptr, "const_vals16");
+}
+
+export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
+ _ = arg1;
+ _ = arg2;
+}
+
+export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
+ _ = arg1;
+ _ = arg2;
+}
+
+export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
+ _ = arg1;
+ _ = arg2;
+}
+
+export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
+ _ = arg1;
+ _ = arg2;
+}
+
+export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
+ // Not valuable because we already have pc tracing via 8bit counters.
+ _ = callee;
+}
+export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void {
+ // clang will emit a call to this function when compiling with code coverage instrumentation.
+ // however, fuzzer_init() does not need this information since it directly reads from the
+ // symbol table.
+ _ = start;
+ _ = end;
+}
+export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
+ // clang will emit a call to this function when compiling with code coverage instrumentation.
+ // however, fuzzer_init() does not need this information since it directly reads from the
+ // symbol table.
+ _ = start;
+ _ = end;
+}
- fuzzer.init(cache_dir, pc_counters, pcs) catch |err|
- fatal("unable to init fuzzer: {s}", .{@errorName(err)});
+/// Copy all of source into dest at position 0.
+/// If the slices overlap, dest.ptr must be <= src.ptr.
+fn volatileCopyForwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
+ for (dest, source) |*d, s| d.* = s;
}
-export fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void {
- fuzzer.addCorpusElem(input_ptr[0..input_len]) catch |err|
- fatal("failed to add corpus element: {s}", .{@errorName(err)});
+/// Copy all of source into dest at position 0.
+/// If the slices overlap, dest.ptr must be >= src.ptr.
+fn volatileCopyBackwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
+ var i = source.len;
+ while (i > 0) {
+ i -= 1;
+ dest[i] = source[i];
+ }
}
+const Mutation = enum {
+ /// Applies .insert_*_span, .push_*_span
+ /// For wtf-8, this limits code units, not code points
+ const max_insert_len = 12;
+ /// Applies to .insert_large_*_span and .push_large_*_span
+ /// 4096 is used as it is a common sector size
+ const max_large_insert_len = 4096;
+ /// Applies to .delete_span and .pop_span
+ const max_delete_len = 16;
+ /// Applies to .set_*span, .move_span, .set_existing_span
+ const max_set_len = 12;
+ const max_replicate_len = 64;
+ const AddValue = i6;
+ const SmallValue = i10;
+
+ delete_byte,
+ delete_span,
+ /// Removes the last byte from the input
+ pop_byte,
+ pop_span,
+ /// Inserts a group of bytes which is already in the input and removes the original copy.
+ move_span,
+ /// Replaces a group of bytes in the input with another group of bytes in the input
+ set_existing_span,
+ insert_existing_span,
+ push_existing_span,
+ set_rng_byte,
+ set_rng_span,
+ insert_rng_byte,
+ insert_rng_span,
+ /// Adds a byte to the end of the input
+ push_rng_byte,
+ push_rng_span,
+ set_zero_byte,
+ set_zero_span,
+ insert_zero_byte,
+ insert_zero_span,
+ push_zero_byte,
+ push_zero_span,
+ /// Inserts a lot of zeros to the end of the input
+ /// This is intended to work with fuzz tests that require data in (large) blocks
+ push_large_zero_span,
+ /// Inserts a group of ascii printable character
+ insert_print_span,
+ /// Inserts a group of character from a...z, A...Z, 0...9, _, and ' '
+ insert_common_span,
+ /// Inserts a group of ascii digits possibly preceded by a `-`
+ insert_integer,
+ /// Code units are evenly distributed between one to four
+ insert_wtf8_char,
+ insert_wtf8_span,
+ /// Inserts a group of bytes from another input
+ insert_splice_span,
+ // utf16 is not yet included since insertion of random bytes should adaquetly check
+ // BMP character, surrogate handling, and occasionally chacters outside of the BMP.
+ set_print_span,
+ set_common_span,
+ set_splice_span,
+ /// Similar to set_splice_span, but the bytes are copied to the same index instead of a random
+ replicate_splice_span,
+ push_print_span,
+ push_common_span,
+ push_integer,
+ push_wtf8_char,
+ push_wtf8_span,
+ push_splice_span,
+ /// Clears a random amount of high bits of a byte
+ truncate_8,
+ truncate_16le,
+ truncate_16be,
+ truncate_32le,
+ truncate_32be,
+ truncate_64le,
+ truncate_64be,
+ /// Flips a random bit
+ xor_1,
+ /// Swaps up to three bits of a byte biased to less bits
+ xor_few_8,
+ /// Swaps up to six bits of a 16-bit value biased to less bits
+ xor_few_16,
+ /// Swaps up to nine bits of a 32-bit value biased to less bits
+ xor_few_32,
+ /// Swaps up to twelve bits of 64-bit value biased to less bits
+ xor_few_64,
+ /// Adds to a byte a value of type AddValue
+ add_8,
+ add_16le,
+ add_16be,
+ add_32le,
+ add_32be,
+ add_64le,
+ add_64be,
+ /// Sets a 16-bit little-endian value to a value of type SmallValue
+ set_small_16le,
+ set_small_16be,
+ set_small_32le,
+ set_small_32be,
+ set_small_64le,
+ set_small_64be,
+ insert_small_16le,
+ insert_small_16be,
+ insert_small_32le,
+ insert_small_32be,
+ insert_small_64le,
+ insert_small_64be,
+ push_small_16le,
+ push_small_16be,
+ push_small_32le,
+ push_small_32be,
+ push_small_64le,
+ push_small_64be,
+ set_const_16,
+ set_const_32,
+ set_const_64,
+ set_const_128,
+ insert_const_16,
+ insert_const_32,
+ insert_const_64,
+ insert_const_128,
+ push_const_16,
+ push_const_32,
+ push_const_64,
+ push_const_128,
+ /// Sets a byte with up to three bits set biased to less bits
+ set_few_8,
+ /// Sets a 16-bit value with up to six bits set biased to less bits
+ set_few_16,
+ /// Sets a 32-bit value with up to nine bits set biased to less bits
+ set_few_32,
+ /// Sets a 64-bit value with up to twelve bits set biased to less bits
+ set_few_64,
+ insert_few_8,
+ insert_few_16,
+ insert_few_32,
+ insert_few_64,
+ push_few_8,
+ push_few_16,
+ push_few_32,
+ push_few_64,
+ /// Randomizes a random contigous group of bits in a byte
+ packed_set_rng_8,
+ packed_set_rng_16le,
+ packed_set_rng_16be,
+ packed_set_rng_32le,
+ packed_set_rng_32be,
+ packed_set_rng_64le,
+ packed_set_rng_64be,
+
+ fn fewValue(rng: std.Random, T: type, comptime bits: u16) T {
+ var result: T = 0;
+ var remaining_bits = rng.intRangeAtMostBiased(u16, 1, bits);
+ while (remaining_bits > 0) {
+ result |= @shlExact(@as(T, 1), rng.int(math.Log2Int(T)));
+ remaining_bits -= 1;
+ }
+ return result;
+ }
+
+ /// Returns if the mutation was applicable to the input
+ pub fn mutate(
+ mutation: Mutation,
+ rng: std.Random,
+ in: []const u8,
+ out: *MemoryMappedList,
+ corpus: []const []const u8,
+ const_vals2: []const u16,
+ const_vals4: []const u32,
+ const_vals8: []const u64,
+ const_vals16: []const u128,
+ ) bool {
+ out.clearRetainingCapacity();
+ const new_capacity = 8 + in.len + @max(
+ 16, // builtin 128 value
+ Mutation.max_insert_len,
+ Mutation.max_large_insert_len,
+ );
+ out.ensureTotalCapacity(new_capacity) catch |e|
+ panic("could not resize shared input file: {t}", .{e});
+ out.items.len = 8; // Length field
+
+ const applied = switch (mutation) {
+ inline else => |m| m.comptimeMutate(
+ rng,
+ in,
+ out,
+ corpus,
+ const_vals2,
+ const_vals4,
+ const_vals8,
+ const_vals16,
+ ),
+ };
+ if (!applied)
+ assert(out.items.len == 8)
+ else
+ assert(out.items.len <= new_capacity);
+ return applied;
+ }
+
+ /// Assumes out has already been cleared
+ fn comptimeMutate(
+ comptime mutation: Mutation,
+ rng: std.Random,
+ in: []const u8,
+ out: *MemoryMappedList,
+ corpus: []const []const u8,
+ const_vals2: []const u16,
+ const_vals4: []const u32,
+ const_vals8: []const u64,
+ const_vals16: []const u128,
+ ) bool {
+ const Class = enum { new, remove, rmw, move_span, replicate_splice_span };
+ const class: Class, const class_ctx = switch (mutation) {
+ // zig fmt: off
+ .move_span => .{ .move_span, null },
+ .replicate_splice_span => .{ .replicate_splice_span, null },
+
+ .delete_byte => .{ .remove, .{ .delete, 1 } },
+ .delete_span => .{ .remove, .{ .delete, max_delete_len } },
+
+ .pop_byte => .{ .remove, .{ .pop, 1 } },
+ .pop_span => .{ .remove, .{ .pop, max_delete_len } },
+
+ .set_rng_byte => .{ .new, .{ .set , 1, .rng , .one } },
+ .set_zero_byte => .{ .new, .{ .set , 1, .zero , .one } },
+ .set_rng_span => .{ .new, .{ .set , 1, .rng , .many } },
+ .set_zero_span => .{ .new, .{ .set , 1, .zero , .many } },
+ .set_common_span => .{ .new, .{ .set , 1, .common , .many } },
+ .set_print_span => .{ .new, .{ .set , 1, .print , .many } },
+ .set_existing_span => .{ .new, .{ .set , 2, .existing, .many } },
+ .set_splice_span => .{ .new, .{ .set , 1, .splice , .many } },
+ .set_const_16 => .{ .new, .{ .set , 2, .@"const", const_vals2 } },
+ .set_const_32 => .{ .new, .{ .set , 4, .@"const", const_vals4 } },
+ .set_const_64 => .{ .new, .{ .set , 8, .@"const", const_vals8 } },
+ .set_const_128 => .{ .new, .{ .set , 16, .@"const", const_vals16 } },
+ .set_small_16le => .{ .new, .{ .set , 2, .small , .{ i16, .little } } },
+ .set_small_32le => .{ .new, .{ .set , 4, .small , .{ i32, .little } } },
+ .set_small_64le => .{ .new, .{ .set , 8, .small , .{ i64, .little } } },
+ .set_small_16be => .{ .new, .{ .set , 2, .small , .{ i16, .big } } },
+ .set_small_32be => .{ .new, .{ .set , 4, .small , .{ i32, .big } } },
+ .set_small_64be => .{ .new, .{ .set , 8, .small , .{ i64, .big } } },
+ .set_few_8 => .{ .new, .{ .set , 1, .few , .{ u8 , 3 } } },
+ .set_few_16 => .{ .new, .{ .set , 2, .few , .{ u16, 6 } } },
+ .set_few_32 => .{ .new, .{ .set , 4, .few , .{ u32, 9 } } },
+ .set_few_64 => .{ .new, .{ .set , 8, .few , .{ u64, 12 } } },
+
+ .insert_rng_byte => .{ .new, .{ .insert, 0, .rng , .one } },
+ .insert_zero_byte => .{ .new, .{ .insert, 0, .zero , .one } },
+ .insert_rng_span => .{ .new, .{ .insert, 0, .rng , .many } },
+ .insert_zero_span => .{ .new, .{ .insert, 0, .zero , .many } },
+ .insert_print_span => .{ .new, .{ .insert, 0, .print , .many } },
+ .insert_common_span => .{ .new, .{ .insert, 0, .common , .many } },
+ .insert_integer => .{ .new, .{ .insert, 0, .integer , .many } },
+ .insert_wtf8_char => .{ .new, .{ .insert, 0, .wtf8 , .one } },
+ .insert_wtf8_span => .{ .new, .{ .insert, 0, .wtf8 , .many } },
+ .insert_existing_span => .{ .new, .{ .insert, 1, .existing, .many } },
+ .insert_splice_span => .{ .new, .{ .insert, 0, .splice , .many } },
+ .insert_const_16 => .{ .new, .{ .insert, 0, .@"const", const_vals2 } },
+ .insert_const_32 => .{ .new, .{ .insert, 0, .@"const", const_vals4 } },
+ .insert_const_64 => .{ .new, .{ .insert, 0, .@"const", const_vals8 } },
+ .insert_const_128 => .{ .new, .{ .insert, 0, .@"const", const_vals16 } },
+ .insert_small_16le => .{ .new, .{ .insert, 0, .small , .{ i16, .little } } },
+ .insert_small_32le => .{ .new, .{ .insert, 0, .small , .{ i32, .little } } },
+ .insert_small_64le => .{ .new, .{ .insert, 0, .small , .{ i64, .little } } },
+ .insert_small_16be => .{ .new, .{ .insert, 0, .small , .{ i16, .big } } },
+ .insert_small_32be => .{ .new, .{ .insert, 0, .small , .{ i32, .big } } },
+ .insert_small_64be => .{ .new, .{ .insert, 0, .small , .{ i64, .big } } },
+ .insert_few_8 => .{ .new, .{ .insert, 0, .few , .{ u8 , 3 } } },
+ .insert_few_16 => .{ .new, .{ .insert, 0, .few , .{ u16, 6 } } },
+ .insert_few_32 => .{ .new, .{ .insert, 0, .few , .{ u32, 9 } } },
+ .insert_few_64 => .{ .new, .{ .insert, 0, .few , .{ u64, 12 } } },
+
+ .push_rng_byte => .{ .new, .{ .push , 0, .rng , .one } },
+ .push_zero_byte => .{ .new, .{ .push , 0, .zero , .one } },
+ .push_rng_span => .{ .new, .{ .push , 0, .rng , .many } },
+ .push_zero_span => .{ .new, .{ .push , 0, .zero , .many } },
+ .push_print_span => .{ .new, .{ .push , 0, .print , .many } },
+ .push_common_span => .{ .new, .{ .push , 0, .common , .many } },
+ .push_integer => .{ .new, .{ .push , 0, .integer , .many } },
+ .push_large_zero_span => .{ .new, .{ .push , 0, .zero , .large } },
+ .push_wtf8_char => .{ .new, .{ .push , 0, .wtf8 , .one } },
+ .push_wtf8_span => .{ .new, .{ .push , 0, .wtf8 , .many } },
+ .push_existing_span => .{ .new, .{ .push , 1, .existing, .many } },
+ .push_splice_span => .{ .new, .{ .push , 0, .splice , .many } },
+ .push_const_16 => .{ .new, .{ .push , 0, .@"const", const_vals2 } },
+ .push_const_32 => .{ .new, .{ .push , 0, .@"const", const_vals4 } },
+ .push_const_64 => .{ .new, .{ .push , 0, .@"const", const_vals8 } },
+ .push_const_128 => .{ .new, .{ .push , 0, .@"const", const_vals16 } },
+ .push_small_16le => .{ .new, .{ .push , 0, .small , .{ i16, .little } } },
+ .push_small_32le => .{ .new, .{ .push , 0, .small , .{ i32, .little } } },
+ .push_small_64le => .{ .new, .{ .push , 0, .small , .{ i64, .little } } },
+ .push_small_16be => .{ .new, .{ .push , 0, .small , .{ i16, .big } } },
+ .push_small_32be => .{ .new, .{ .push , 0, .small , .{ i32, .big } } },
+ .push_small_64be => .{ .new, .{ .push , 0, .small , .{ i64, .big } } },
+ .push_few_8 => .{ .new, .{ .push , 0, .few , .{ u8 , 3 } } },
+ .push_few_16 => .{ .new, .{ .push , 0, .few , .{ u16, 6 } } },
+ .push_few_32 => .{ .new, .{ .push , 0, .few , .{ u32, 9 } } },
+ .push_few_64 => .{ .new, .{ .push , 0, .few , .{ u64, 12 } } },
+
+ .xor_1 => .{ .rmw, .{ .xor , u8 , native_endian, 1 } },
+ .xor_few_8 => .{ .rmw, .{ .xor , u8 , native_endian, 3 } },
+ .xor_few_16 => .{ .rmw, .{ .xor , u16, native_endian, 6 } },
+ .xor_few_32 => .{ .rmw, .{ .xor , u32, native_endian, 9 } },
+ .xor_few_64 => .{ .rmw, .{ .xor , u64, native_endian, 12 } },
+
+ .truncate_8 => .{ .rmw, .{ .truncate , u8 , native_endian, {} } },
+ .truncate_16le => .{ .rmw, .{ .truncate , u16, .little , {} } },
+ .truncate_32le => .{ .rmw, .{ .truncate , u32, .little , {} } },
+ .truncate_64le => .{ .rmw, .{ .truncate , u64, .little , {} } },
+ .truncate_16be => .{ .rmw, .{ .truncate , u16, .big , {} } },
+ .truncate_32be => .{ .rmw, .{ .truncate , u32, .big , {} } },
+ .truncate_64be => .{ .rmw, .{ .truncate , u64, .big , {} } },
+
+ .add_8 => .{ .rmw, .{ .add , i8 , native_endian, {} } },
+ .add_16le => .{ .rmw, .{ .add , i16, .little , {} } },
+ .add_32le => .{ .rmw, .{ .add , i32, .little , {} } },
+ .add_64le => .{ .rmw, .{ .add , i64, .little , {} } },
+ .add_16be => .{ .rmw, .{ .add , i16, .big , {} } },
+ .add_32be => .{ .rmw, .{ .add , i32, .big , {} } },
+ .add_64be => .{ .rmw, .{ .add , i64, .big , {} } },
+
+ .packed_set_rng_8 => .{ .rmw, .{ .packed_rng, u8 , native_endian, {} } },
+ .packed_set_rng_16le => .{ .rmw, .{ .packed_rng, u16, .little , {} } },
+ .packed_set_rng_32le => .{ .rmw, .{ .packed_rng, u32, .little , {} } },
+ .packed_set_rng_64le => .{ .rmw, .{ .packed_rng, u64, .little , {} } },
+ .packed_set_rng_16be => .{ .rmw, .{ .packed_rng, u16, .big , {} } },
+ .packed_set_rng_32be => .{ .rmw, .{ .packed_rng, u32, .big , {} } },
+ .packed_set_rng_64be => .{ .rmw, .{ .packed_rng, u64, .big , {} } },
+ // zig fmt: on
+ };
+
+ switch (class) {
+ .new => {
+ const op: enum {
+ set,
+ insert,
+ push,
+
+ pub fn maxLen(comptime op: @This(), in_len: usize) usize {
+ return switch (op) {
+ .set => @min(in_len, max_set_len),
+ .insert, .push => max_insert_len,
+ };
+ }
+ }, const min_in_len, const data: enum {
+ rng,
+ zero,
+ common,
+ print,
+ integer,
+ wtf8,
+ existing,
+ splice,
+ @"const",
+ small,
+ few,
+ }, const data_ctx = class_ctx;
+ const Size = enum { one, many, large };
+ if (in.len < min_in_len) return false;
+ if (data == .@"const" and data_ctx.len == 0) return false;
+
+ const splice_i = if (data == .splice) blk: {
+ // Element zero always holds an empty input, so we do not select it
+ if (corpus.len == 1) return false;
+ break :blk rng.intRangeLessThanBiased(usize, 1, corpus.len);
+ } else undefined;
+
+ // Only needs to be followed for set
+ const len = switch (data) {
+ else => switch (@as(Size, data_ctx)) {
+ .one => 1,
+ .many => rng.intRangeAtMostBiased(usize, 1, op.maxLen(in.len)),
+ .large => rng.intRangeAtMostBiased(usize, 1, max_large_insert_len),
+ },
+ .wtf8 => undefined, // varies by size of each code unit
+ .splice => rng.intRangeAtMostBiased(usize, 1, @min(
+ corpus[splice_i].len,
+ op.maxLen(in.len),
+ )),
+ .existing => rng.intRangeAtMostBiased(usize, 1, @min(
+ in.len,
+ op.maxLen(in.len),
+ )),
+ .@"const" => @sizeOf(@typeInfo(@TypeOf(data_ctx)).pointer.child),
+ .small, .few => @sizeOf(data_ctx[0]),
+ };
+
+ const i = switch (op) {
+ .set => rng.uintAtMostBiased(usize, in.len - len),
+ .insert => rng.uintAtMostBiased(usize, in.len),
+ .push => in.len,
+ };
+
+ out.appendSliceAssumeCapacity(in[0..i]);
+ switch (data) {
+ .rng => {
+ var bytes: [@max(max_insert_len, max_set_len)]u8 = undefined;
+ rng.bytes(bytes[0..len]);
+ out.appendSliceAssumeCapacity(bytes[0..len]);
+ },
+ .zero => out.appendNTimesAssumeCapacity(0, len),
+ .common => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
+ c.* = switch (rng.int(u6)) {
+ 0 => ' ',
+ 1...10 => |x| '0' + (@as(u8, x) - 1),
+ 11...36 => |x| 'A' + (@as(u8, x) - 11),
+ 37 => '_',
+ 38...63 => |x| 'a' + (@as(u8, x) - 38),
+ };
+ },
+ .print => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
+ c.* = rng.intRangeAtMostBiased(u8, 0x20, 0x7E);
+ },
+ .integer => {
+ const negative = len != 0 and rng.boolean();
+ if (negative) {
+ out.appendAssumeCapacity('-');
+ }
+
+ for (out.addManyAsSliceAssumeCapacity(len - @intFromBool(negative))) |*c| {
+ c.* = rng.intRangeAtMostBiased(u8, '0', '9');
+ }
+ },
+ .wtf8 => {
+ comptime assert(op != .set);
+ var codepoints: usize = if (data_ctx == .one)
+ 1
+ else
+ rng.intRangeAtMostBiased(usize, 1, Mutation.max_insert_len / 4);
+
+ while (true) {
+ const units1 = rng.int(u2);
+ const value = switch (units1) {
+ 0 => rng.int(u7),
+ 1 => rng.intRangeAtMostBiased(u11, 0x000080, 0x0007FF),
+ 2 => rng.intRangeAtMostBiased(u16, 0x000800, 0x00FFFF),
+ 3 => rng.intRangeAtMostBiased(u21, 0x010000, 0x10FFFF),
+ };
+ const units = @as(u3, units1) + 1;
+
+ var buf: [4]u8 = undefined;
+ assert(std.unicode.wtf8Encode(value, &buf) catch unreachable == units);
+ out.appendSliceAssumeCapacity(buf[0..units]);
+
+ codepoints -= 1;
+ if (codepoints == 0) break;
+ }
+ },
+ .existing => {
+ const j = rng.uintAtMostBiased(usize, in.len - len);
+ out.appendSliceAssumeCapacity(in[j..][0..len]);
+ },
+ .splice => {
+ const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
+ out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
+ },
+ .@"const" => out.appendSliceAssumeCapacity(mem.asBytes(
+ &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
+ )),
+ .small => out.appendSliceAssumeCapacity(mem.asBytes(
+ &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
+ )),
+ .few => out.appendSliceAssumeCapacity(mem.asBytes(
+ &fewValue(rng, data_ctx[0], data_ctx[1]),
+ )),
+ }
+ switch (op) {
+ .set => out.appendSliceAssumeCapacity(in[i + len ..]),
+ .insert => out.appendSliceAssumeCapacity(in[i..]),
+ .push => {},
+ }
+ },
+ .remove => {
+ if (in.len == 0) return false;
+ const Op = enum { delete, pop };
+ const op: Op, const max_len = class_ctx;
+ // LessThan is used so we don't delete the entire span (which is unproductive since
+ // an empty input has always been tried)
+ const len = if (max_len == 1) 1 else rng.uintLessThanBiased(
+ usize,
+ @min(max_len + 1, in.len),
+ );
+ switch (op) {
+ .delete => {
+ const i = rng.uintAtMostBiased(usize, in.len - len);
+ out.appendSliceAssumeCapacity(in[0..i]);
+ out.appendSliceAssumeCapacity(in[i + len ..]);
+ },
+ .pop => out.appendSliceAssumeCapacity(in[0 .. in.len - len]),
+ }
+ },
+ .rmw => {
+ const Op = enum { xor, truncate, add, packed_rng };
+ const op: Op, const T, const endian, const xor_bits = class_ctx;
+ if (in.len < @sizeOf(T)) return false;
+ const Log2T = math.Log2Int(T);
+
+ const idx = rng.uintAtMostBiased(usize, in.len - @sizeOf(T));
+ const old = mem.readInt(T, in[idx..][0..@sizeOf(T)], endian);
+ const new = switch (op) {
+ .xor => old ^ fewValue(rng, T, xor_bits),
+ .truncate => old & (@as(T, math.maxInt(T)) >> rng.int(Log2T)),
+ .add => old +% addend: {
+ const val = rng.int(Mutation.AddValue);
+ break :addend if (val == 0) 1 else val;
+ },
+ .packed_rng => blk: {
+ const bits = rng.int(math.Log2Int(T)) +| 1;
+ break :blk old ^ (rng.int(T) >> bits << rng.uintAtMostBiased(Log2T, bits));
+ },
+ };
+ out.appendSliceAssumeCapacity(in);
+ mem.bytesAsValue(T, out.items[8..][idx..][0..@sizeOf(T)]).* =
+ mem.nativeTo(T, new, endian);
+ },
+ .move_span => {
+ if (in.len < 2) return false;
+ // One less since moving whole output will never change anything
+ const len = rng.intRangeAtMostBiased(usize, 1, @min(
+ in.len - 1,
+ Mutation.max_set_len,
+ ));
+
+ const src = rng.uintAtMostBiased(usize, in.len - len);
+ // This indexes into the final input
+ const dst = blk: {
+ const res = rng.uintAtMostBiased(usize, in.len - len - 1);
+ break :blk res + @intFromBool(res >= src);
+ };
+
+ if (src < dst) {
+ out.appendSliceAssumeCapacity(in[0..src]);
+ out.appendSliceAssumeCapacity(in[src + len .. dst + len]);
+ out.appendSliceAssumeCapacity(in[src..][0..len]);
+ out.appendSliceAssumeCapacity(in[dst + len ..]);
+ } else {
+ out.appendSliceAssumeCapacity(in[0..dst]);
+ out.appendSliceAssumeCapacity(in[src..][0..len]);
+ out.appendSliceAssumeCapacity(in[dst..src]);
+ out.appendSliceAssumeCapacity(in[src + len ..]);
+ }
+ },
+ .replicate_splice_span => {
+ if (in.len == 0) return false;
+ if (corpus.len == 1) return false;
+ const from = corpus[rng.intRangeLessThanBiased(usize, 1, corpus.len)];
+ const len = rng.uintLessThanBiased(usize, @min(in.len, from.len, max_replicate_len));
+ const i = rng.uintAtMostBiased(usize, @min(in.len, from.len) - len);
+ out.appendSliceAssumeCapacity(in[0..i]);
+ out.appendSliceAssumeCapacity(from[i..][0..len]);
+ out.appendSliceAssumeCapacity(in[i + len ..]);
+ },
+ }
+ return true;
+ }
+};
+
/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping.
pub const MemoryMappedList = struct {
/// Contents of the list.
@@ -654,8 +1587,23 @@ pub const MemoryMappedList = struct {
fn growCapacity(current: usize, minimum: usize) usize {
var new = current;
while (true) {
- new = std.mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
+ new = mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
if (new >= minimum) return new;
}
}
+
+ pub fn insertAssumeCapacity(l: *MemoryMappedList, i: usize, item: u8) void {
+ assert(l.items.len + 1 <= l.capacity);
+ l.items.len += 1;
+ volatileCopyBackwards(u8, l.items[i + 1 ..], l.items[i .. l.items.len - 1]);
+ l.items[i] = item;
+ }
+
+ pub fn orderedRemove(l: *MemoryMappedList, i: usize) u8 {
+ assert(l.items.len + 1 <= l.capacity);
+ const old = l.items[i];
+ volatileCopyForwards(u8, l.items[i .. l.items.len - 1], l.items[i + 1 ..]);
+ l.items.len -= 1;
+ return old;
+ }
};
src/codegen/llvm.zig
@@ -1115,7 +1115,7 @@ pub const Object = struct {
.NoPrune = false,
// Workaround for https://github.com/llvm/llvm-project/pull/106464
.StackDepth = true,
- .TraceLoads = false,
+ .TraceLoads = options.fuzz,
.TraceStores = false,
.CollectControlFlow = false,
},
test/standalone/libfuzzer/build.zig
@@ -16,6 +16,7 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
.fuzz = true,
}),
+ .use_llvm = true, // #23423
});
b.installArtifact(exe);
test/standalone/libfuzzer/main.zig
@@ -1,29 +1,43 @@
const std = @import("std");
+const abi = std.Build.abi.fuzz;
+const native_endian = @import("builtin").cpu.arch.endian();
-const FuzzerSlice = extern struct {
- ptr: [*]const u8,
- len: usize,
+fn testOne(in: abi.Slice) callconv(.c) void {
+ std.debug.assertReadable(in.toSlice());
+}
- fn fromSlice(s: []const u8) FuzzerSlice {
- return .{ .ptr = s.ptr, .len = s.len };
- }
-};
+pub fn main() !void {
+ var debug_gpa_ctx: std.heap.DebugAllocator(.{}) = .init;
+ defer _ = debug_gpa_ctx.deinit();
+ const gpa = debug_gpa_ctx.allocator();
-extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void;
-extern fn fuzzer_init(cache_dir: FuzzerSlice) void;
-extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void;
-extern fn fuzzer_coverage_id() u64;
+ var args = try std.process.argsWithAllocator(gpa);
+ defer args.deinit();
+ _ = args.skip(); // executable name
-pub fn main() !void {
- var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
- defer _ = gpa.deinit();
- const args = try std.process.argsAlloc(gpa.allocator());
- defer std.process.argsFree(gpa.allocator(), args);
+ const cache_dir_path = args.next() orelse @panic("expected cache directory path argument");
+ var cache_dir = try std.fs.cwd().openDir(cache_dir_path, .{});
+ defer cache_dir.close();
+
+ abi.fuzzer_init(.fromSlice(cache_dir_path));
+ abi.fuzzer_init_test(testOne, .fromSlice("test"));
+ abi.fuzzer_new_input(.fromSlice(""));
+ abi.fuzzer_new_input(.fromSlice("hello"));
+
+ const pc_digest = abi.fuzzer_coverage_id();
+ const coverage_file_path = "v/" ++ std.fmt.hex(pc_digest);
+ const coverage_file = try cache_dir.openFile(coverage_file_path, .{});
+ defer coverage_file.close();
- const cache_dir = args[1];
+ var read_buf: [@sizeOf(abi.SeenPcsHeader)]u8 = undefined;
+ var r = coverage_file.reader(&read_buf);
+ const pcs_header = r.interface.takeStruct(abi.SeenPcsHeader, native_endian) catch return r.err.?;
- fuzzer_init(FuzzerSlice.fromSlice(cache_dir));
- fuzzer_init_corpus_elem("hello".ptr, "hello".len);
- fuzzer_set_name("test".ptr, "test".len);
- _ = fuzzer_coverage_id();
+ if (pcs_header.pcs_len == 0)
+ return error.ZeroPcs;
+ const expected_len = @sizeOf(abi.SeenPcsHeader) +
+ try std.math.divCeil(usize, pcs_header.pcs_len, @bitSizeOf(usize)) * @sizeOf(usize) +
+ pcs_header.pcs_len * @sizeOf(usize);
+ if (try coverage_file.getEndPos() != expected_len)
+ return error.WrongEnd;
}