master
1const builtin = @import("builtin");
2const std = @import("std");
3const fatal = std.process.fatal;
4const mem = std.mem;
5const math = std.math;
6const Allocator = mem.Allocator;
7const assert = std.debug.assert;
8const panic = std.debug.panic;
9const abi = std.Build.abi.fuzz;
10const native_endian = builtin.cpu.arch.endian();
11
12pub const std_options = std.Options{
13 .logFn = logOverride,
14};
15
16fn logOverride(
17 comptime level: std.log.Level,
18 comptime scope: @EnumLiteral(),
19 comptime format: []const u8,
20 args: anytype,
21) void {
22 const f = log_f orelse
23 panic("attempt to use log before initialization, message:\n" ++ format, args);
24 f.lock(.exclusive) catch |e| panic("failed to lock logging file: {t}", .{e});
25 defer f.unlock();
26
27 var buf: [256]u8 = undefined;
28 var fw = f.writer(&buf);
29 const end = f.getEndPos() catch |e| panic("failed to get fuzzer log file end: {t}", .{e});
30 fw.seekTo(end) catch |e| panic("failed to seek to fuzzer log file end: {t}", .{e});
31
32 const prefix1 = comptime level.asText();
33 const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
34 fw.interface.print(
35 "[{s}] " ++ prefix1 ++ prefix2 ++ format ++ "\n",
36 .{current_test_name orelse "setup"} ++ args,
37 ) catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
38 fw.interface.flush() catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
39}
40
41var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
42const gpa = switch (builtin.mode) {
43 .Debug => debug_allocator.allocator(),
44 .ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator,
45};
46
47/// Part of `exec`, however seperate to allow it to be set before `exec` is.
48var log_f: ?std.fs.File = null;
49var exec: Executable = .preinit;
50var inst: Instrumentation = .preinit;
51var fuzzer: Fuzzer = undefined;
52var current_test_name: ?[]const u8 = null;
53
54fn bitsetUsizes(elems: usize) usize {
55 return math.divCeil(usize, elems, @bitSizeOf(usize)) catch unreachable;
56}
57
58const Executable = struct {
59 /// Tracks the hit count for each pc as updated by the process's instrumentation.
60 pc_counters: []u8,
61
62 cache_f: std.fs.Dir,
63 /// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed
64 /// while the fuzzer is running.
65 shared_seen_pcs: MemoryMappedList,
66 /// Hash of pcs used to uniquely identify the shared coverage file
67 pc_digest: u64,
68
69 /// A minimal state for this struct which instrumentation can function on.
70 /// Used before this structure is initialized to avoid illegal behavior
71 /// from instrumentation functions being called and using undefined values.
72 pub const preinit: Executable = .{
73 .pc_counters = undefined, // instrumentation works off the __sancov_cntrs section
74 .cache_f = undefined,
75 .shared_seen_pcs = undefined,
76 .pc_digest = undefined,
77 };
78
79 fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList {
80 const pc_bitset_usizes = bitsetUsizes(pcs.len);
81 const coverage_file_name = std.fmt.hex(pc_digest);
82 comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
83 comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr);
84
85 var v = cache_dir.makeOpenPath("v", .{}) catch |e|
86 panic("failed to create directory 'v': {t}", .{e});
87 defer v.close();
88 const coverage_file, const populate = if (v.createFile(&coverage_file_name, .{
89 .read = true,
90 // If we create the file, we want to block other processes while we populate it
91 .lock = .exclusive,
92 .exclusive = true,
93 })) |f|
94 .{ f, true }
95 else |e| switch (e) {
96 error.PathAlreadyExists => .{ v.openFile(&coverage_file_name, .{
97 .mode = .read_write,
98 .lock = .shared,
99 }) catch |e2| panic(
100 "failed to open existing coverage file '{s}': {t}",
101 .{ &coverage_file_name, e2 },
102 ), false },
103 else => panic("failed to create coverage file '{s}': {t}", .{ &coverage_file_name, e }),
104 };
105
106 const coverage_file_len = @sizeOf(abi.SeenPcsHeader) +
107 pc_bitset_usizes * @sizeOf(usize) +
108 pcs.len * @sizeOf(usize);
109
110 if (populate) {
111 defer coverage_file.lock(.shared) catch |e| panic(
112 "failed to demote lock for coverage file '{s}': {t}",
113 .{ &coverage_file_name, e },
114 );
115 var map = MemoryMappedList.create(coverage_file, 0, coverage_file_len) catch |e| panic(
116 "failed to init memory map for coverage file '{s}': {t}",
117 .{ &coverage_file_name, e },
118 );
119 map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{
120 .n_runs = 0,
121 .unique_runs = 0,
122 .pcs_len = pcs.len,
123 }));
124 map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
125 // Relocations have been applied to `pcs` so it contains runtime addresses (with slide
126 // applied). We need to translate these to the virtual addresses as on disk.
127 for (pcs) |pc| {
128 const pc_vaddr = fuzzer_unslide_address(pc);
129 map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr));
130 }
131 return map;
132 } else {
133 const size = coverage_file.getEndPos() catch |e| panic(
134 "failed to stat coverage file '{s}': {t}",
135 .{ &coverage_file_name, e },
136 );
137 if (size != coverage_file_len) panic(
138 "incompatible existing coverage file '{s}' (differing lengths: {} != {})",
139 .{ &coverage_file_name, size, coverage_file_len },
140 );
141
142 const map = MemoryMappedList.init(
143 coverage_file,
144 coverage_file_len,
145 coverage_file_len,
146 ) catch |e| panic(
147 "failed to init memory map for coverage file '{s}': {t}",
148 .{ &coverage_file_name, e },
149 );
150
151 const seen_pcs_header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(map.items));
152 if (seen_pcs_header.pcs_len != pcs.len) panic(
153 "incompatible existing coverage file '{s}' (differing pcs length: {} != {})",
154 .{ &coverage_file_name, seen_pcs_header.pcs_len, pcs.len },
155 );
156 if (mem.indexOfDiff(usize, seen_pcs_header.pcAddrs(), pcs)) |i| panic(
157 "incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})",
158 .{ &coverage_file_name, i, seen_pcs_header.pcAddrs()[i], pcs[i] },
159 );
160
161 return map;
162 }
163 }
164
165 pub fn init(cache_dir_path: []const u8) Executable {
166 var self: Executable = undefined;
167
168 const cache_dir = std.fs.cwd().makeOpenPath(cache_dir_path, .{}) catch |e| panic(
169 "failed to open directory '{s}': {t}",
170 .{ cache_dir_path, e },
171 );
172 log_f = cache_dir.createFile("tmp/libfuzzer.log", .{ .truncate = false }) catch |e|
173 panic("failed to create file 'tmp/libfuzzer.log': {t}", .{e});
174 self.cache_f = cache_dir.makeOpenPath("f", .{}) catch |e|
175 panic("failed to open directory 'f': {t}", .{e});
176
177 // Linkers are expected to automatically add symbols prefixed with these for the start and
178 // end of sections whose names are valid C identifiers.
179 const ofmt = builtin.object_format;
180 const section_start_prefix, const section_end_prefix = switch (ofmt) {
181 .elf => .{ "__start_", "__stop_" },
182 .macho => .{ "\x01section$start$__DATA$", "\x01section$end$__DATA$" },
183 else => @compileError("unsupported fuzzing object format '" ++ @tagName(ofmt) ++ "'"),
184 };
185
186 self.pc_counters = blk: {
187 const pc_counters_start_name = section_start_prefix ++ "__sancov_cntrs";
188 const pc_counters_start = @extern([*]u8, .{
189 .name = pc_counters_start_name,
190 .linkage = .weak,
191 }) orelse panic("missing {s} symbol", .{pc_counters_start_name});
192
193 const pc_counters_end_name = section_end_prefix ++ "__sancov_cntrs";
194 const pc_counters_end = @extern([*]u8, .{
195 .name = pc_counters_end_name,
196 .linkage = .weak,
197 }) orelse panic("missing {s} symbol", .{pc_counters_end_name});
198
199 break :blk pc_counters_start[0 .. pc_counters_end - pc_counters_start];
200 };
201
202 const pcs = blk: {
203 const pcs_start_name = section_start_prefix ++ "__sancov_pcs1";
204 const pcs_start = @extern([*]usize, .{
205 .name = pcs_start_name,
206 .linkage = .weak,
207 }) orelse panic("missing {s} symbol", .{pcs_start_name});
208
209 const pcs_end_name = section_end_prefix ++ "__sancov_pcs1";
210 const pcs_end = @extern([*]usize, .{
211 .name = pcs_end_name,
212 .linkage = .weak,
213 }) orelse panic("missing {s} symbol", .{pcs_end_name});
214
215 break :blk pcs_start[0 .. pcs_end - pcs_start];
216 };
217
218 if (self.pc_counters.len != pcs.len) panic(
219 "pc counters length and pcs length do not match ({} != {})",
220 .{ self.pc_counters.len, pcs.len },
221 );
222
223 self.pc_digest = digest: {
224 // Relocations have been applied to `pcs` so it contains runtime addresses (with slide
225 // applied). We need to translate these to the virtual addresses as on disk.
226 var h: std.hash.Wyhash = .init(0);
227 for (pcs) |pc| {
228 const pc_vaddr = fuzzer_unslide_address(pc);
229 h.update(@ptrCast(&pc_vaddr));
230 }
231 break :digest h.final();
232 };
233 self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
234
235 return self;
236 }
237
238 pub fn pcBitsetIterator(self: Executable) PcBitsetIterator {
239 return .{ .pc_counters = self.pc_counters };
240 }
241
242 /// Iterates over pc_counters returning a bitset for if each of them have been hit
243 pub const PcBitsetIterator = struct {
244 index: usize = 0,
245 pc_counters: []u8,
246
247 pub fn next(self: *PcBitsetIterator) usize {
248 const rest = self.pc_counters[self.index..];
249 if (rest.len >= @bitSizeOf(usize)) {
250 defer self.index += @bitSizeOf(usize);
251 const V = @Vector(@bitSizeOf(usize), u8);
252 return @as(usize, @bitCast(@as(V, @splat(0)) != rest[0..@bitSizeOf(usize)].*));
253 } else if (rest.len != 0) {
254 defer self.index += rest.len;
255 var res: usize = 0;
256 for (0.., rest) |bit_index, byte| {
257 res |= @shlExact(@as(usize, @intFromBool(byte != 0)), @intCast(bit_index));
258 }
259 return res;
260 } else unreachable;
261 }
262 };
263};
264
265/// Data gathered from instrumentation functions.
266/// Seperate from Executable since its state is resetable and changes.
267/// Seperate from Fuzzer since it may be needed before fuzzing starts.
268const Instrumentation = struct {
269 /// Bitset of seen pcs across all runs excluding fresh pcs.
270 /// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using
271 /// it which causes contention and unrelated pcs to our campaign being set.
272 seen_pcs: []usize,
273
274 /// Stores a fresh input's new pcs
275 fresh_pcs: []usize,
276
277 /// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx
278 /// have been called from and have had their already been added to const_x_vals
279 const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty,
280 /// Values that have been constant operands in comparisons and switch cases.
281 /// There may be duplicates in this array if they came from different addresses, which is
282 /// fine as they are likely more important and hence more likely to be selected.
283 const_vals2: std.ArrayList(u16) = .empty,
284 const_vals4: std.ArrayList(u32) = .empty,
285 const_vals8: std.ArrayList(u64) = .empty,
286 const_vals16: std.ArrayList(u128) = .empty,
287
288 /// A minimal state for this struct which instrumentation can function on.
289 /// Used before this structure is initialized to avoid illegal behavior
290 /// from instrumentation functions being called and using undefined values.
291 pub const preinit: Instrumentation = .{
292 .seen_pcs = undefined, // currently only updated by `Fuzzer`
293 .fresh_pcs = undefined,
294 };
295
296 pub fn depreinit(self: *Instrumentation) void {
297 self.const_vals2.deinit(gpa);
298 self.const_vals4.deinit(gpa);
299 self.const_vals8.deinit(gpa);
300 self.const_vals16.deinit(gpa);
301 self.* = undefined;
302 }
303
304 pub fn init() Instrumentation {
305 const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len);
306 const alloc_usizes = pc_bitset_usizes * 2;
307 const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM");
308 var fba_ctx: std.heap.FixedBufferAllocator = .init(buf);
309 const fba = fba_ctx.allocator();
310
311 var self: Instrumentation = .{
312 .seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
313 .fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
314 };
315 self.reset();
316 return self;
317 }
318
319 pub fn reset(self: *Instrumentation) void {
320 @memset(self.seen_pcs, 0);
321 @memset(self.fresh_pcs, 0);
322 self.const_pcs.clearRetainingCapacity();
323 self.const_vals2.clearRetainingCapacity();
324 self.const_vals4.clearRetainingCapacity();
325 self.const_vals8.clearRetainingCapacity();
326 self.const_vals16.clearRetainingCapacity();
327 }
328
329 /// If false is returned, then the pc is marked as seen
330 pub fn constPcSeen(self: *Instrumentation, pc: usize) bool {
331 return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing;
332 }
333
334 pub fn isFresh(self: *Instrumentation) bool {
335 var hit_pcs = exec.pcBitsetIterator();
336 for (self.seen_pcs) |seen_pcs| {
337 if (hit_pcs.next() & ~seen_pcs != 0) return true;
338 }
339
340 return false;
341 }
342
343 /// Updates `fresh_pcs`
344 pub fn setFresh(self: *Instrumentation) void {
345 var hit_pcs = exec.pcBitsetIterator();
346 for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| {
347 fresh_pcs.* = hit_pcs.next() & ~seen_pcs;
348 }
349 }
350
351 /// Returns if `exec.pc_counters` is a superset of `fresh_pcs`.
352 pub fn atleastFresh(self: *Instrumentation) bool {
353 var hit_pcs = exec.pcBitsetIterator();
354 for (self.fresh_pcs) |fresh_pcs| {
355 if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false;
356 }
357 return true;
358 }
359
360 /// Updates based off `fresh_pcs`
361 fn updateSeen(self: *Instrumentation) void {
362 comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
363 const shared_seen_pcs: [*]volatile usize = @ptrCast(
364 exec.shared_seen_pcs.items[@sizeOf(abi.SeenPcsHeader)..].ptr,
365 );
366
367 for (self.seen_pcs, shared_seen_pcs, self.fresh_pcs) |*seen, *shared_seen, fresh| {
368 seen.* |= fresh;
369 if (fresh != 0)
370 _ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic);
371 }
372 }
373};
374
375const Fuzzer = struct {
376 arena_ctx: std.heap.ArenaAllocator = .init(gpa),
377 rng: std.Random.DefaultPrng = .init(0),
378 test_one: abi.TestOne,
379 /// The next input that will be given to the testOne function. When the
380 /// current process crashes, this memory-mapped file is used to recover the
381 /// input.
382 input: MemoryMappedList,
383
384 /// Minimized past inputs leading to new pc hits.
385 /// These are randomly mutated in round-robin fashion
386 /// Element zero is always an empty input. It is gauraunteed no other elements are empty.
387 corpus: std.ArrayList([]const u8),
388 corpus_pos: usize,
389 /// List of past mutations that have led to new inputs. This way, the mutations that are the
390 /// most effective are the most likely to be selected again. Starts with one of each mutation.
391 mutations: std.ArrayList(Mutation) = .empty,
392
393 /// Filesystem directory containing found inputs for future runs
394 corpus_dir: std.fs.Dir,
395 corpus_dir_idx: usize = 0,
396
397 pub fn init(test_one: abi.TestOne, unit_test_name: []const u8) Fuzzer {
398 var self: Fuzzer = .{
399 .test_one = test_one,
400 .input = undefined,
401 .corpus = .empty,
402 .corpus_pos = 0,
403 .mutations = .empty,
404 .corpus_dir = undefined,
405 };
406 const arena = self.arena_ctx.allocator();
407
408 self.corpus_dir = exec.cache_f.makeOpenPath(unit_test_name, .{}) catch |e|
409 panic("failed to open directory '{s}': {t}", .{ unit_test_name, e });
410 self.input = in: {
411 const f = self.corpus_dir.createFile("in", .{
412 .read = true,
413 .truncate = false,
414 // In case any other fuzz tests are running under the same test name,
415 // the input file is exclusively locked to ensures only one proceeds.
416 .lock = .exclusive,
417 .lock_nonblocking = true,
418 }) catch |e| switch (e) {
419 error.WouldBlock => @panic("input file 'in' is in use by another fuzzing process"),
420 else => panic("failed to create input file 'in': {t}", .{e}),
421 };
422 const size = f.getEndPos() catch |e| panic("failed to stat input file 'in': {t}", .{e});
423 const map = (if (size < std.heap.page_size_max)
424 MemoryMappedList.create(f, 8, std.heap.page_size_max)
425 else
426 MemoryMappedList.init(f, size, size)) catch |e|
427 panic("failed to memory map input file 'in': {t}", .{e});
428
429 // Perform a dry-run of the stored input if there was one in case it might reproduce a
430 // crash.
431 const old_in_len = mem.littleToNative(usize, mem.bytesAsValue(usize, map.items[0..8]).*);
432 if (size >= 8 and old_in_len != 0 and map.items.len - 8 < old_in_len) {
433 test_one(.fromSlice(@volatileCast(map.items[8..][0..old_in_len])));
434 }
435
436 break :in map;
437 };
438 inst.reset();
439
440 self.mutations.appendSlice(gpa, std.meta.tags(Mutation)) catch @panic("OOM");
441 // Ensure there is never an empty corpus. Additionally, an empty input usually leads to
442 // new inputs.
443 self.addInput(&.{});
444
445 while (true) {
446 var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
447 const bytes = self.corpus_dir.readFileAlloc(
448 std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable,
449 arena,
450 .unlimited,
451 ) catch |e| switch (e) {
452 error.FileNotFound => break,
453 else => panic("failed to read corpus file '{x}': {t}", .{ self.corpus_dir_idx, e }),
454 };
455 // No corpus file of length zero will ever be created
456 if (bytes.len == 0)
457 panic("corrupt corpus file '{x}' (len of zero)", .{self.corpus_dir_idx});
458 self.addInput(bytes);
459 self.corpus_dir_idx += 1;
460 }
461
462 return self;
463 }
464
465 pub fn deinit(self: *Fuzzer) void {
466 self.input.deinit();
467 self.corpus.deinit(gpa);
468 self.mutations.deinit(gpa);
469 self.corpus_dir.close();
470 self.arena_ctx.deinit();
471 self.* = undefined;
472 }
473
474 pub fn addInput(self: *Fuzzer, bytes: []const u8) void {
475 self.corpus.append(gpa, bytes) catch @panic("OOM");
476 self.input.clearRetainingCapacity();
477 self.input.ensureTotalCapacity(8 + bytes.len) catch |e|
478 panic("could not resize shared input file: {t}", .{e});
479 self.input.items.len = 8;
480 self.input.appendSliceAssumeCapacity(bytes);
481 self.run();
482 inst.setFresh();
483 inst.updateSeen();
484 }
485
486 /// Assumes `fresh_pcs` correspond to the input
487 fn minimizeInput(self: *Fuzzer) void {
488 // The minimization technique is kept relatively simple, we sequentially try to remove each
489 // byte and check that the new pcs and memory loads are still hit.
490 var i = self.input.items.len;
491 while (i != 8) {
492 i -= 1;
493 const old = self.input.orderedRemove(i);
494
495 @memset(exec.pc_counters, 0);
496 self.run();
497
498 if (!inst.atleastFresh()) {
499 self.input.insertAssumeCapacity(i, old);
500 } else {
501 // This removal may have led to new pcs or memory loads being hit, so we need to
502 // update them to avoid duplicates.
503 inst.setFresh();
504 }
505 }
506 }
507
508 fn run(self: *Fuzzer) void {
509 // `pc_counters` is not cleared since only new hits are relevant.
510
511 mem.bytesAsValue(usize, self.input.items[0..8]).* =
512 mem.nativeToLittle(usize, self.input.items.len - 8);
513 self.test_one(.fromSlice(@volatileCast(self.input.items[8..])));
514
515 const header = mem.bytesAsValue(
516 abi.SeenPcsHeader,
517 exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
518 );
519 _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
520 }
521
522 pub fn cycle(self: *Fuzzer) void {
523 const input = self.corpus.items[self.corpus_pos];
524 self.corpus_pos += 1;
525 if (self.corpus_pos == self.corpus.items.len)
526 self.corpus_pos = 0;
527
528 const rng = self.rng.random();
529 const m = while (true) {
530 const m = self.mutations.items[rng.uintLessThanBiased(usize, self.mutations.items.len)];
531 if (!m.mutate(
532 rng,
533 input,
534 &self.input,
535 self.corpus.items,
536 inst.const_vals2.items,
537 inst.const_vals4.items,
538 inst.const_vals8.items,
539 inst.const_vals16.items,
540 )) continue;
541 break m;
542 };
543
544 self.run();
545
546 if (inst.isFresh()) {
547 @branchHint(.unlikely);
548
549 const header = mem.bytesAsValue(
550 abi.SeenPcsHeader,
551 exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
552 );
553 _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
554
555 inst.setFresh();
556 self.minimizeInput();
557 inst.updateSeen();
558
559 // An empty-input has always been tried, so if an empty input is fresh then the
560 // test has to be non-deterministic. This has to be checked as duplicate empty
561 // entries are not allowed.
562 if (self.input.items.len - 8 == 0) {
563 std.log.warn("non-deterministic test (empty input produces different hits)", .{});
564 _ = @atomicRmw(usize, &header.unique_runs, .Sub, 1, .monotonic);
565 return;
566 }
567
568 const arena = self.arena_ctx.allocator();
569 const bytes = arena.dupe(u8, @volatileCast(self.input.items[8..])) catch @panic("OOM");
570
571 self.corpus.append(gpa, bytes) catch @panic("OOM");
572 self.mutations.appendNTimes(gpa, m, 6) catch @panic("OOM");
573
574 // Write new corpus to cache
575 var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
576 self.corpus_dir.writeFile(.{
577 .sub_path = std.fmt.bufPrint(
578 &name_buf,
579 "{x}",
580 .{self.corpus_dir_idx},
581 ) catch unreachable,
582 .data = bytes,
583 }) catch |e| panic(
584 "failed to write corpus file '{x}': {t}",
585 .{ self.corpus_dir_idx, e },
586 );
587 self.corpus_dir_idx += 1;
588 }
589 }
590};
591
592/// Instrumentation must not be triggered before this function is called
593export fn fuzzer_init(cache_dir_path: abi.Slice) void {
594 inst.depreinit();
595 exec = .init(cache_dir_path.toSlice());
596 inst = .init();
597}
598
599/// Invalid until `fuzzer_init` is called.
600export fn fuzzer_coverage() abi.Coverage {
601 const coverage_id = exec.pc_digest;
602 const header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(exec.shared_seen_pcs.items.ptr));
603
604 var seen_count: usize = 0;
605 for (header.seenBits()) |chunk| {
606 seen_count += @popCount(chunk);
607 }
608
609 return .{
610 .id = coverage_id,
611 .runs = header.n_runs,
612 .unique = header.unique_runs,
613 .seen = seen_count,
614 };
615}
616
617/// fuzzer_init must be called beforehand
618export fn fuzzer_init_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void {
619 current_test_name = unit_test_name.toSlice();
620 fuzzer = .init(test_one, unit_test_name.toSlice());
621}
622
623/// fuzzer_init_test must be called beforehand
624/// The callee owns the memory of bytes and must not free it until the fuzzer is finished.
625export fn fuzzer_new_input(bytes: abi.Slice) void {
626 // An entry of length zero is always added and duplicates of it are not allowed.
627 if (bytes.len != 0)
628 fuzzer.addInput(bytes.toSlice());
629}
630
631/// fuzzer_init_test must be called first
632export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void {
633 switch (limit_kind) {
634 .forever => while (true) fuzzer.cycle(),
635 .iterations => for (0..amount) |_| fuzzer.cycle(),
636 }
637}
638
639export fn fuzzer_unslide_address(addr: usize) usize {
640 const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
641 const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| {
642 std.debug.panic("failed to find virtual address slide: {t}", .{err});
643 };
644 return addr - slide;
645}
646
647/// Helps determine run uniqueness in the face of recursion.
648/// Currently not used by the fuzzer.
649export threadlocal var __sancov_lowest_stack: usize = 0;
650
651/// Inline since the return address of the callee is required
652inline fn genericConstCmp(T: anytype, val: T, comptime const_vals_field: []const u8) void {
653 if (!inst.constPcSeen(@returnAddress())) {
654 @branchHint(.unlikely);
655 @field(inst, const_vals_field).append(gpa, val) catch @panic("OOM");
656 }
657}
658
659export fn __sanitizer_cov_trace_const_cmp1(const_arg: u8, arg: u8) void {
660 _ = const_arg;
661 _ = arg;
662}
663
664export fn __sanitizer_cov_trace_const_cmp2(const_arg: u16, arg: u16) void {
665 _ = arg;
666 genericConstCmp(u16, const_arg, "const_vals2");
667}
668
669export fn __sanitizer_cov_trace_const_cmp4(const_arg: u32, arg: u32) void {
670 _ = arg;
671 genericConstCmp(u32, const_arg, "const_vals4");
672}
673
674export fn __sanitizer_cov_trace_const_cmp8(const_arg: u64, arg: u64) void {
675 _ = arg;
676 genericConstCmp(u64, const_arg, "const_vals8");
677}
678
679export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void {
680 _ = val;
681 if (!inst.constPcSeen(@returnAddress())) {
682 @branchHint(.unlikely);
683 const case_bits = cases[1];
684 const cases_slice = cases[2..][0..cases[0]];
685 switch (case_bits) {
686 // 8-bit cases are ignored because they are likely to be randomly generated
687 0...8 => {},
688 9...16 => for (cases_slice) |c|
689 inst.const_vals2.append(gpa, @truncate(c)) catch @panic("OOM"),
690 17...32 => for (cases_slice) |c|
691 inst.const_vals4.append(gpa, @truncate(c)) catch @panic("OOM"),
692 33...64 => for (cases_slice) |c|
693 inst.const_vals8.append(gpa, @truncate(c)) catch @panic("OOM"),
694 else => {}, // Should be impossible
695 }
696 }
697}
698
699export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
700 _ = arg1;
701 _ = arg2;
702}
703
704export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
705 _ = arg1;
706 _ = arg2;
707}
708
709export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
710 _ = arg1;
711 _ = arg2;
712}
713
714export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
715 _ = arg1;
716 _ = arg2;
717}
718
719export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
720 // Not valuable because we already have pc tracing via 8bit counters.
721 _ = callee;
722}
723export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void {
724 // clang will emit a call to this function when compiling with code coverage instrumentation.
725 // however, fuzzer_init() does not need this information since it directly reads from the
726 // symbol table.
727 _ = start;
728 _ = end;
729}
730export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
731 // clang will emit a call to this function when compiling with code coverage instrumentation.
732 // however, fuzzer_init() does not need this information since it directly reads from the
733 // symbol table.
734 _ = start;
735 _ = end;
736}
737
738/// Copy all of source into dest at position 0.
739/// If the slices overlap, dest.ptr must be <= src.ptr.
740fn volatileCopyForwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
741 for (dest, source) |*d, s| d.* = s;
742}
743
744/// Copy all of source into dest at position 0.
745/// If the slices overlap, dest.ptr must be >= src.ptr.
746fn volatileCopyBackwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
747 var i = source.len;
748 while (i > 0) {
749 i -= 1;
750 dest[i] = source[i];
751 }
752}
753
754const Mutation = enum {
755 /// Applies .insert_*_span, .push_*_span
756 /// For wtf-8, this limits code units, not code points
757 const max_insert_len = 12;
758 /// Applies to .insert_large_*_span and .push_large_*_span
759 /// 4096 is used as it is a common sector size
760 const max_large_insert_len = 4096;
761 /// Applies to .delete_span and .pop_span
762 const max_delete_len = 16;
763 /// Applies to .set_*span, .move_span, .set_existing_span
764 const max_set_len = 12;
765 const max_replicate_len = 64;
766 const AddValue = i6;
767 const SmallValue = i10;
768
769 delete_byte,
770 delete_span,
771 /// Removes the last byte from the input
772 pop_byte,
773 pop_span,
774 /// Inserts a group of bytes which is already in the input and removes the original copy.
775 move_span,
776 /// Replaces a group of bytes in the input with another group of bytes in the input
777 set_existing_span,
778 insert_existing_span,
779 push_existing_span,
780 set_rng_byte,
781 set_rng_span,
782 insert_rng_byte,
783 insert_rng_span,
784 /// Adds a byte to the end of the input
785 push_rng_byte,
786 push_rng_span,
787 set_zero_byte,
788 set_zero_span,
789 insert_zero_byte,
790 insert_zero_span,
791 push_zero_byte,
792 push_zero_span,
793 /// Inserts a lot of zeros to the end of the input
794 /// This is intended to work with fuzz tests that require data in (large) blocks
795 push_large_zero_span,
796 /// Inserts a group of ascii printable character
797 insert_print_span,
798 /// Inserts a group of character from a...z, A...Z, 0...9, _, and ' '
799 insert_common_span,
800 /// Inserts a group of ascii digits possibly preceded by a `-`
801 insert_integer,
802 /// Code units are evenly distributed between one to four
803 insert_wtf8_char,
804 insert_wtf8_span,
805 /// Inserts a group of bytes from another input
806 insert_splice_span,
807 // utf16 is not yet included since insertion of random bytes should adaquetly check
808 // BMP character, surrogate handling, and occasionally chacters outside of the BMP.
809 set_print_span,
810 set_common_span,
811 set_splice_span,
812 /// Similar to set_splice_span, but the bytes are copied to the same index instead of a random
813 replicate_splice_span,
814 push_print_span,
815 push_common_span,
816 push_integer,
817 push_wtf8_char,
818 push_wtf8_span,
819 push_splice_span,
820 /// Clears a random amount of high bits of a byte
821 truncate_8,
822 truncate_16le,
823 truncate_16be,
824 truncate_32le,
825 truncate_32be,
826 truncate_64le,
827 truncate_64be,
828 /// Flips a random bit
829 xor_1,
830 /// Swaps up to three bits of a byte biased to less bits
831 xor_few_8,
832 /// Swaps up to six bits of a 16-bit value biased to less bits
833 xor_few_16,
834 /// Swaps up to nine bits of a 32-bit value biased to less bits
835 xor_few_32,
836 /// Swaps up to twelve bits of 64-bit value biased to less bits
837 xor_few_64,
838 /// Adds to a byte a value of type AddValue
839 add_8,
840 add_16le,
841 add_16be,
842 add_32le,
843 add_32be,
844 add_64le,
845 add_64be,
846 /// Sets a 16-bit little-endian value to a value of type SmallValue
847 set_small_16le,
848 set_small_16be,
849 set_small_32le,
850 set_small_32be,
851 set_small_64le,
852 set_small_64be,
853 insert_small_16le,
854 insert_small_16be,
855 insert_small_32le,
856 insert_small_32be,
857 insert_small_64le,
858 insert_small_64be,
859 push_small_16le,
860 push_small_16be,
861 push_small_32le,
862 push_small_32be,
863 push_small_64le,
864 push_small_64be,
865 set_const_16,
866 set_const_32,
867 set_const_64,
868 set_const_128,
869 insert_const_16,
870 insert_const_32,
871 insert_const_64,
872 insert_const_128,
873 push_const_16,
874 push_const_32,
875 push_const_64,
876 push_const_128,
877 /// Sets a byte with up to three bits set biased to less bits
878 set_few_8,
879 /// Sets a 16-bit value with up to six bits set biased to less bits
880 set_few_16,
881 /// Sets a 32-bit value with up to nine bits set biased to less bits
882 set_few_32,
883 /// Sets a 64-bit value with up to twelve bits set biased to less bits
884 set_few_64,
885 insert_few_8,
886 insert_few_16,
887 insert_few_32,
888 insert_few_64,
889 push_few_8,
890 push_few_16,
891 push_few_32,
892 push_few_64,
893 /// Randomizes a random contigous group of bits in a byte
894 packed_set_rng_8,
895 packed_set_rng_16le,
896 packed_set_rng_16be,
897 packed_set_rng_32le,
898 packed_set_rng_32be,
899 packed_set_rng_64le,
900 packed_set_rng_64be,
901
902 fn fewValue(rng: std.Random, T: type, comptime bits: u16) T {
903 var result: T = 0;
904 var remaining_bits = rng.intRangeAtMostBiased(u16, 1, bits);
905 while (remaining_bits > 0) {
906 result |= @shlExact(@as(T, 1), rng.int(math.Log2Int(T)));
907 remaining_bits -= 1;
908 }
909 return result;
910 }
911
912 /// Returns if the mutation was applicable to the input
913 pub fn mutate(
914 mutation: Mutation,
915 rng: std.Random,
916 in: []const u8,
917 out: *MemoryMappedList,
918 corpus: []const []const u8,
919 const_vals2: []const u16,
920 const_vals4: []const u32,
921 const_vals8: []const u64,
922 const_vals16: []const u128,
923 ) bool {
924 out.clearRetainingCapacity();
925 const new_capacity = 8 + in.len + @max(
926 16, // builtin 128 value
927 Mutation.max_insert_len,
928 Mutation.max_large_insert_len,
929 );
930 out.ensureTotalCapacity(new_capacity) catch |e|
931 panic("could not resize shared input file: {t}", .{e});
932 out.items.len = 8; // Length field
933
934 const applied = switch (mutation) {
935 inline else => |m| m.comptimeMutate(
936 rng,
937 in,
938 out,
939 corpus,
940 const_vals2,
941 const_vals4,
942 const_vals8,
943 const_vals16,
944 ),
945 };
946 if (!applied)
947 assert(out.items.len == 8)
948 else
949 assert(out.items.len <= new_capacity);
950 return applied;
951 }
952
953 /// Assumes out has already been cleared
954 fn comptimeMutate(
955 comptime mutation: Mutation,
956 rng: std.Random,
957 in: []const u8,
958 out: *MemoryMappedList,
959 corpus: []const []const u8,
960 const_vals2: []const u16,
961 const_vals4: []const u32,
962 const_vals8: []const u64,
963 const_vals16: []const u128,
964 ) bool {
965 const Class = enum { new, remove, rmw, move_span, replicate_splice_span };
966 const class: Class, const class_ctx = switch (mutation) {
967 // zig fmt: off
968 .move_span => .{ .move_span, null },
969 .replicate_splice_span => .{ .replicate_splice_span, null },
970
971 .delete_byte => .{ .remove, .{ .delete, 1 } },
972 .delete_span => .{ .remove, .{ .delete, max_delete_len } },
973
974 .pop_byte => .{ .remove, .{ .pop, 1 } },
975 .pop_span => .{ .remove, .{ .pop, max_delete_len } },
976
977 .set_rng_byte => .{ .new, .{ .set , 1, .rng , .one } },
978 .set_zero_byte => .{ .new, .{ .set , 1, .zero , .one } },
979 .set_rng_span => .{ .new, .{ .set , 1, .rng , .many } },
980 .set_zero_span => .{ .new, .{ .set , 1, .zero , .many } },
981 .set_common_span => .{ .new, .{ .set , 1, .common , .many } },
982 .set_print_span => .{ .new, .{ .set , 1, .print , .many } },
983 .set_existing_span => .{ .new, .{ .set , 2, .existing, .many } },
984 .set_splice_span => .{ .new, .{ .set , 1, .splice , .many } },
985 .set_const_16 => .{ .new, .{ .set , 2, .@"const", const_vals2 } },
986 .set_const_32 => .{ .new, .{ .set , 4, .@"const", const_vals4 } },
987 .set_const_64 => .{ .new, .{ .set , 8, .@"const", const_vals8 } },
988 .set_const_128 => .{ .new, .{ .set , 16, .@"const", const_vals16 } },
989 .set_small_16le => .{ .new, .{ .set , 2, .small , .{ i16, .little } } },
990 .set_small_32le => .{ .new, .{ .set , 4, .small , .{ i32, .little } } },
991 .set_small_64le => .{ .new, .{ .set , 8, .small , .{ i64, .little } } },
992 .set_small_16be => .{ .new, .{ .set , 2, .small , .{ i16, .big } } },
993 .set_small_32be => .{ .new, .{ .set , 4, .small , .{ i32, .big } } },
994 .set_small_64be => .{ .new, .{ .set , 8, .small , .{ i64, .big } } },
995 .set_few_8 => .{ .new, .{ .set , 1, .few , .{ u8 , 3 } } },
996 .set_few_16 => .{ .new, .{ .set , 2, .few , .{ u16, 6 } } },
997 .set_few_32 => .{ .new, .{ .set , 4, .few , .{ u32, 9 } } },
998 .set_few_64 => .{ .new, .{ .set , 8, .few , .{ u64, 12 } } },
999
1000 .insert_rng_byte => .{ .new, .{ .insert, 0, .rng , .one } },
1001 .insert_zero_byte => .{ .new, .{ .insert, 0, .zero , .one } },
1002 .insert_rng_span => .{ .new, .{ .insert, 0, .rng , .many } },
1003 .insert_zero_span => .{ .new, .{ .insert, 0, .zero , .many } },
1004 .insert_print_span => .{ .new, .{ .insert, 0, .print , .many } },
1005 .insert_common_span => .{ .new, .{ .insert, 0, .common , .many } },
1006 .insert_integer => .{ .new, .{ .insert, 0, .integer , .many } },
1007 .insert_wtf8_char => .{ .new, .{ .insert, 0, .wtf8 , .one } },
1008 .insert_wtf8_span => .{ .new, .{ .insert, 0, .wtf8 , .many } },
1009 .insert_existing_span => .{ .new, .{ .insert, 1, .existing, .many } },
1010 .insert_splice_span => .{ .new, .{ .insert, 0, .splice , .many } },
1011 .insert_const_16 => .{ .new, .{ .insert, 0, .@"const", const_vals2 } },
1012 .insert_const_32 => .{ .new, .{ .insert, 0, .@"const", const_vals4 } },
1013 .insert_const_64 => .{ .new, .{ .insert, 0, .@"const", const_vals8 } },
1014 .insert_const_128 => .{ .new, .{ .insert, 0, .@"const", const_vals16 } },
1015 .insert_small_16le => .{ .new, .{ .insert, 0, .small , .{ i16, .little } } },
1016 .insert_small_32le => .{ .new, .{ .insert, 0, .small , .{ i32, .little } } },
1017 .insert_small_64le => .{ .new, .{ .insert, 0, .small , .{ i64, .little } } },
1018 .insert_small_16be => .{ .new, .{ .insert, 0, .small , .{ i16, .big } } },
1019 .insert_small_32be => .{ .new, .{ .insert, 0, .small , .{ i32, .big } } },
1020 .insert_small_64be => .{ .new, .{ .insert, 0, .small , .{ i64, .big } } },
1021 .insert_few_8 => .{ .new, .{ .insert, 0, .few , .{ u8 , 3 } } },
1022 .insert_few_16 => .{ .new, .{ .insert, 0, .few , .{ u16, 6 } } },
1023 .insert_few_32 => .{ .new, .{ .insert, 0, .few , .{ u32, 9 } } },
1024 .insert_few_64 => .{ .new, .{ .insert, 0, .few , .{ u64, 12 } } },
1025
1026 .push_rng_byte => .{ .new, .{ .push , 0, .rng , .one } },
1027 .push_zero_byte => .{ .new, .{ .push , 0, .zero , .one } },
1028 .push_rng_span => .{ .new, .{ .push , 0, .rng , .many } },
1029 .push_zero_span => .{ .new, .{ .push , 0, .zero , .many } },
1030 .push_print_span => .{ .new, .{ .push , 0, .print , .many } },
1031 .push_common_span => .{ .new, .{ .push , 0, .common , .many } },
1032 .push_integer => .{ .new, .{ .push , 0, .integer , .many } },
1033 .push_large_zero_span => .{ .new, .{ .push , 0, .zero , .large } },
1034 .push_wtf8_char => .{ .new, .{ .push , 0, .wtf8 , .one } },
1035 .push_wtf8_span => .{ .new, .{ .push , 0, .wtf8 , .many } },
1036 .push_existing_span => .{ .new, .{ .push , 1, .existing, .many } },
1037 .push_splice_span => .{ .new, .{ .push , 0, .splice , .many } },
1038 .push_const_16 => .{ .new, .{ .push , 0, .@"const", const_vals2 } },
1039 .push_const_32 => .{ .new, .{ .push , 0, .@"const", const_vals4 } },
1040 .push_const_64 => .{ .new, .{ .push , 0, .@"const", const_vals8 } },
1041 .push_const_128 => .{ .new, .{ .push , 0, .@"const", const_vals16 } },
1042 .push_small_16le => .{ .new, .{ .push , 0, .small , .{ i16, .little } } },
1043 .push_small_32le => .{ .new, .{ .push , 0, .small , .{ i32, .little } } },
1044 .push_small_64le => .{ .new, .{ .push , 0, .small , .{ i64, .little } } },
1045 .push_small_16be => .{ .new, .{ .push , 0, .small , .{ i16, .big } } },
1046 .push_small_32be => .{ .new, .{ .push , 0, .small , .{ i32, .big } } },
1047 .push_small_64be => .{ .new, .{ .push , 0, .small , .{ i64, .big } } },
1048 .push_few_8 => .{ .new, .{ .push , 0, .few , .{ u8 , 3 } } },
1049 .push_few_16 => .{ .new, .{ .push , 0, .few , .{ u16, 6 } } },
1050 .push_few_32 => .{ .new, .{ .push , 0, .few , .{ u32, 9 } } },
1051 .push_few_64 => .{ .new, .{ .push , 0, .few , .{ u64, 12 } } },
1052
1053 .xor_1 => .{ .rmw, .{ .xor , u8 , native_endian, 1 } },
1054 .xor_few_8 => .{ .rmw, .{ .xor , u8 , native_endian, 3 } },
1055 .xor_few_16 => .{ .rmw, .{ .xor , u16, native_endian, 6 } },
1056 .xor_few_32 => .{ .rmw, .{ .xor , u32, native_endian, 9 } },
1057 .xor_few_64 => .{ .rmw, .{ .xor , u64, native_endian, 12 } },
1058
1059 .truncate_8 => .{ .rmw, .{ .truncate , u8 , native_endian, {} } },
1060 .truncate_16le => .{ .rmw, .{ .truncate , u16, .little , {} } },
1061 .truncate_32le => .{ .rmw, .{ .truncate , u32, .little , {} } },
1062 .truncate_64le => .{ .rmw, .{ .truncate , u64, .little , {} } },
1063 .truncate_16be => .{ .rmw, .{ .truncate , u16, .big , {} } },
1064 .truncate_32be => .{ .rmw, .{ .truncate , u32, .big , {} } },
1065 .truncate_64be => .{ .rmw, .{ .truncate , u64, .big , {} } },
1066
1067 .add_8 => .{ .rmw, .{ .add , i8 , native_endian, {} } },
1068 .add_16le => .{ .rmw, .{ .add , i16, .little , {} } },
1069 .add_32le => .{ .rmw, .{ .add , i32, .little , {} } },
1070 .add_64le => .{ .rmw, .{ .add , i64, .little , {} } },
1071 .add_16be => .{ .rmw, .{ .add , i16, .big , {} } },
1072 .add_32be => .{ .rmw, .{ .add , i32, .big , {} } },
1073 .add_64be => .{ .rmw, .{ .add , i64, .big , {} } },
1074
1075 .packed_set_rng_8 => .{ .rmw, .{ .packed_rng, u8 , native_endian, {} } },
1076 .packed_set_rng_16le => .{ .rmw, .{ .packed_rng, u16, .little , {} } },
1077 .packed_set_rng_32le => .{ .rmw, .{ .packed_rng, u32, .little , {} } },
1078 .packed_set_rng_64le => .{ .rmw, .{ .packed_rng, u64, .little , {} } },
1079 .packed_set_rng_16be => .{ .rmw, .{ .packed_rng, u16, .big , {} } },
1080 .packed_set_rng_32be => .{ .rmw, .{ .packed_rng, u32, .big , {} } },
1081 .packed_set_rng_64be => .{ .rmw, .{ .packed_rng, u64, .big , {} } },
1082 // zig fmt: on
1083 };
1084
1085 switch (class) {
1086 .new => {
1087 const op: enum {
1088 set,
1089 insert,
1090 push,
1091
1092 pub fn maxLen(comptime op: @This(), in_len: usize) usize {
1093 return switch (op) {
1094 .set => @min(in_len, max_set_len),
1095 .insert, .push => max_insert_len,
1096 };
1097 }
1098 }, const min_in_len, const data: enum {
1099 rng,
1100 zero,
1101 common,
1102 print,
1103 integer,
1104 wtf8,
1105 existing,
1106 splice,
1107 @"const",
1108 small,
1109 few,
1110 }, const data_ctx = class_ctx;
1111 const Size = enum { one, many, large };
1112 if (in.len < min_in_len) return false;
1113 if (data == .@"const" and data_ctx.len == 0) return false;
1114
1115 const splice_i = if (data == .splice) blk: {
1116 // Element zero always holds an empty input, so we do not select it
1117 if (corpus.len == 1) return false;
1118 break :blk rng.intRangeLessThanBiased(usize, 1, corpus.len);
1119 } else undefined;
1120
1121 // Only needs to be followed for set
1122 const len = switch (data) {
1123 else => switch (@as(Size, data_ctx)) {
1124 .one => 1,
1125 .many => rng.intRangeAtMostBiased(usize, 1, op.maxLen(in.len)),
1126 .large => rng.intRangeAtMostBiased(usize, 1, max_large_insert_len),
1127 },
1128 .wtf8 => undefined, // varies by size of each code unit
1129 .splice => rng.intRangeAtMostBiased(usize, 1, @min(
1130 corpus[splice_i].len,
1131 op.maxLen(in.len),
1132 )),
1133 .existing => rng.intRangeAtMostBiased(usize, 1, @min(
1134 in.len,
1135 op.maxLen(in.len),
1136 )),
1137 .@"const" => @sizeOf(@typeInfo(@TypeOf(data_ctx)).pointer.child),
1138 .small, .few => @sizeOf(data_ctx[0]),
1139 };
1140
1141 const i = switch (op) {
1142 .set => rng.uintAtMostBiased(usize, in.len - len),
1143 .insert => rng.uintAtMostBiased(usize, in.len),
1144 .push => in.len,
1145 };
1146
1147 out.appendSliceAssumeCapacity(in[0..i]);
1148 switch (data) {
1149 .rng => {
1150 var bytes: [@max(max_insert_len, max_set_len)]u8 = undefined;
1151 rng.bytes(bytes[0..len]);
1152 out.appendSliceAssumeCapacity(bytes[0..len]);
1153 },
1154 .zero => out.appendNTimesAssumeCapacity(0, len),
1155 .common => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
1156 c.* = switch (rng.int(u6)) {
1157 0 => ' ',
1158 1...10 => |x| '0' + (@as(u8, x) - 1),
1159 11...36 => |x| 'A' + (@as(u8, x) - 11),
1160 37 => '_',
1161 38...63 => |x| 'a' + (@as(u8, x) - 38),
1162 };
1163 },
1164 .print => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
1165 c.* = rng.intRangeAtMostBiased(u8, 0x20, 0x7E);
1166 },
1167 .integer => {
1168 const negative = len != 0 and rng.boolean();
1169 if (negative) {
1170 out.appendAssumeCapacity('-');
1171 }
1172
1173 for (out.addManyAsSliceAssumeCapacity(len - @intFromBool(negative))) |*c| {
1174 c.* = rng.intRangeAtMostBiased(u8, '0', '9');
1175 }
1176 },
1177 .wtf8 => {
1178 comptime assert(op != .set);
1179 var codepoints: usize = if (data_ctx == .one)
1180 1
1181 else
1182 rng.intRangeAtMostBiased(usize, 1, Mutation.max_insert_len / 4);
1183
1184 while (true) {
1185 const units1 = rng.int(u2);
1186 const value = switch (units1) {
1187 0 => rng.int(u7),
1188 1 => rng.intRangeAtMostBiased(u11, 0x000080, 0x0007FF),
1189 2 => rng.intRangeAtMostBiased(u16, 0x000800, 0x00FFFF),
1190 3 => rng.intRangeAtMostBiased(u21, 0x010000, 0x10FFFF),
1191 };
1192 const units = @as(u3, units1) + 1;
1193
1194 var buf: [4]u8 = undefined;
1195 assert(std.unicode.wtf8Encode(value, &buf) catch unreachable == units);
1196 out.appendSliceAssumeCapacity(buf[0..units]);
1197
1198 codepoints -= 1;
1199 if (codepoints == 0) break;
1200 }
1201 },
1202 .existing => {
1203 const j = rng.uintAtMostBiased(usize, in.len - len);
1204 out.appendSliceAssumeCapacity(in[j..][0..len]);
1205 },
1206 .splice => {
1207 const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
1208 out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
1209 },
1210 .@"const" => out.appendSliceAssumeCapacity(@ptrCast(
1211 &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
1212 )),
1213 .small => out.appendSliceAssumeCapacity(@ptrCast(
1214 &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
1215 )),
1216 .few => out.appendSliceAssumeCapacity(@ptrCast(
1217 &fewValue(rng, data_ctx[0], data_ctx[1]),
1218 )),
1219 }
1220 switch (op) {
1221 .set => out.appendSliceAssumeCapacity(in[i + len ..]),
1222 .insert => out.appendSliceAssumeCapacity(in[i..]),
1223 .push => {},
1224 }
1225 },
1226 .remove => {
1227 if (in.len == 0) return false;
1228 const Op = enum { delete, pop };
1229 const op: Op, const max_len = class_ctx;
1230 // LessThan is used so we don't delete the entire span (which is unproductive since
1231 // an empty input has always been tried)
1232 const len = if (max_len == 1) 1 else rng.uintLessThanBiased(
1233 usize,
1234 @min(max_len + 1, in.len),
1235 );
1236 switch (op) {
1237 .delete => {
1238 const i = rng.uintAtMostBiased(usize, in.len - len);
1239 out.appendSliceAssumeCapacity(in[0..i]);
1240 out.appendSliceAssumeCapacity(in[i + len ..]);
1241 },
1242 .pop => out.appendSliceAssumeCapacity(in[0 .. in.len - len]),
1243 }
1244 },
1245 .rmw => {
1246 const Op = enum { xor, truncate, add, packed_rng };
1247 const op: Op, const T, const endian, const xor_bits = class_ctx;
1248 if (in.len < @sizeOf(T)) return false;
1249 const Log2T = math.Log2Int(T);
1250
1251 const idx = rng.uintAtMostBiased(usize, in.len - @sizeOf(T));
1252 const old = mem.readInt(T, in[idx..][0..@sizeOf(T)], endian);
1253 const new = switch (op) {
1254 .xor => old ^ fewValue(rng, T, xor_bits),
1255 .truncate => old & (@as(T, math.maxInt(T)) >> rng.int(Log2T)),
1256 .add => old +% addend: {
1257 const val = rng.int(Mutation.AddValue);
1258 break :addend if (val == 0) 1 else val;
1259 },
1260 .packed_rng => blk: {
1261 const bits = rng.int(math.Log2Int(T)) +| 1;
1262 break :blk old ^ (rng.int(T) >> bits << rng.uintAtMostBiased(Log2T, bits));
1263 },
1264 };
1265 out.appendSliceAssumeCapacity(in);
1266 mem.bytesAsValue(T, out.items[8..][idx..][0..@sizeOf(T)]).* =
1267 mem.nativeTo(T, new, endian);
1268 },
1269 .move_span => {
1270 if (in.len < 2) return false;
1271 // One less since moving whole output will never change anything
1272 const len = rng.intRangeAtMostBiased(usize, 1, @min(
1273 in.len - 1,
1274 Mutation.max_set_len,
1275 ));
1276
1277 const src = rng.uintAtMostBiased(usize, in.len - len);
1278 // This indexes into the final input
1279 const dst = blk: {
1280 const res = rng.uintAtMostBiased(usize, in.len - len - 1);
1281 break :blk res + @intFromBool(res >= src);
1282 };
1283
1284 if (src < dst) {
1285 out.appendSliceAssumeCapacity(in[0..src]);
1286 out.appendSliceAssumeCapacity(in[src + len .. dst + len]);
1287 out.appendSliceAssumeCapacity(in[src..][0..len]);
1288 out.appendSliceAssumeCapacity(in[dst + len ..]);
1289 } else {
1290 out.appendSliceAssumeCapacity(in[0..dst]);
1291 out.appendSliceAssumeCapacity(in[src..][0..len]);
1292 out.appendSliceAssumeCapacity(in[dst..src]);
1293 out.appendSliceAssumeCapacity(in[src + len ..]);
1294 }
1295 },
1296 .replicate_splice_span => {
1297 if (in.len == 0) return false;
1298 if (corpus.len == 1) return false;
1299 const from = corpus[rng.intRangeLessThanBiased(usize, 1, corpus.len)];
1300 const len = rng.uintLessThanBiased(usize, @min(in.len, from.len, max_replicate_len));
1301 const i = rng.uintAtMostBiased(usize, @min(in.len, from.len) - len);
1302 out.appendSliceAssumeCapacity(in[0..i]);
1303 out.appendSliceAssumeCapacity(from[i..][0..len]);
1304 out.appendSliceAssumeCapacity(in[i + len ..]);
1305 },
1306 }
1307 return true;
1308 }
1309};
1310
1311/// Like `std.ArrayList(u8)` but backed by memory mapping.
1312pub const MemoryMappedList = struct {
1313 /// Contents of the list.
1314 ///
1315 /// Pointers to elements in this slice are invalidated by various functions
1316 /// of this ArrayList in accordance with the respective documentation. In
1317 /// all cases, "invalidated" means that the memory has been passed to this
1318 /// allocator's resize or free function.
1319 items: []align(std.heap.page_size_min) volatile u8,
1320 /// How many bytes this list can hold without allocating additional memory.
1321 capacity: usize,
1322 /// The file is kept open so that it can be resized.
1323 file: std.fs.File,
1324
1325 pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
1326 const ptr = try std.posix.mmap(
1327 null,
1328 capacity,
1329 std.posix.PROT.READ | std.posix.PROT.WRITE,
1330 .{ .TYPE = .SHARED },
1331 file.handle,
1332 0,
1333 );
1334 return .{
1335 .file = file,
1336 .items = ptr[0..length],
1337 .capacity = capacity,
1338 };
1339 }
1340
1341 pub fn create(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
1342 try file.setEndPos(capacity);
1343 return init(file, length, capacity);
1344 }
1345
1346 pub fn deinit(l: *MemoryMappedList) void {
1347 l.file.close();
1348 std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
1349 l.* = undefined;
1350 }
1351
1352 /// Modify the array so that it can hold at least `additional_count` **more** items.
1353 /// Invalidates element pointers if additional memory is needed.
1354 pub fn ensureUnusedCapacity(l: *MemoryMappedList, additional_count: usize) !void {
1355 return l.ensureTotalCapacity(l.items.len + additional_count);
1356 }
1357
1358 /// If the current capacity is less than `new_capacity`, this function will
1359 /// modify the array so that it can hold at least `new_capacity` items.
1360 /// Invalidates element pointers if additional memory is needed.
1361 pub fn ensureTotalCapacity(l: *MemoryMappedList, new_capacity: usize) !void {
1362 if (l.capacity >= new_capacity) return;
1363
1364 const better_capacity = growCapacity(l.capacity, new_capacity);
1365 return l.ensureTotalCapacityPrecise(better_capacity);
1366 }
1367
1368 pub fn ensureTotalCapacityPrecise(l: *MemoryMappedList, new_capacity: usize) !void {
1369 if (l.capacity >= new_capacity) return;
1370
1371 std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
1372 try l.file.setEndPos(new_capacity);
1373 l.* = try init(l.file, l.items.len, new_capacity);
1374 }
1375
1376 /// Invalidates all element pointers.
1377 pub fn clearRetainingCapacity(l: *MemoryMappedList) void {
1378 l.items.len = 0;
1379 }
1380
1381 /// Append the slice of items to the list.
1382 /// Asserts that the list can hold the additional items.
1383 pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void {
1384 const old_len = l.items.len;
1385 const new_len = old_len + items.len;
1386 assert(new_len <= l.capacity);
1387 l.items.len = new_len;
1388 @memcpy(l.items[old_len..][0..items.len], items);
1389 }
1390
1391 /// Extends the list by 1 element.
1392 /// Never invalidates element pointers.
1393 /// Asserts that the list can hold one additional item.
1394 pub fn appendAssumeCapacity(l: *MemoryMappedList, item: u8) void {
1395 const new_item_ptr = l.addOneAssumeCapacity();
1396 new_item_ptr.* = item;
1397 }
1398
1399 /// Increase length by 1, returning pointer to the new item.
1400 /// The returned pointer becomes invalid when the list is resized.
1401 /// Never invalidates element pointers.
1402 /// Asserts that the list can hold one additional item.
1403 pub fn addOneAssumeCapacity(l: *MemoryMappedList) *volatile u8 {
1404 assert(l.items.len < l.capacity);
1405 l.items.len += 1;
1406 return &l.items[l.items.len - 1];
1407 }
1408
1409 /// Append a value to the list `n` times.
1410 /// Never invalidates element pointers.
1411 /// The function is inline so that a comptime-known `value` parameter will
1412 /// have better memset codegen in case it has a repeated byte pattern.
1413 /// Asserts that the list can hold the additional items.
1414 pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void {
1415 const new_len = l.items.len + n;
1416 assert(new_len <= l.capacity);
1417 @memset(l.items.ptr[l.items.len..new_len], value);
1418 l.items.len = new_len;
1419 }
1420
1421 /// Resize the array, adding `n` new elements, which have `undefined` values.
1422 /// The return value is a slice pointing to the newly allocated elements.
1423 /// Never invalidates element pointers.
1424 /// The returned pointer becomes invalid when the list is resized.
1425 /// Asserts that the list can hold the additional items.
1426 pub fn addManyAsSliceAssumeCapacity(l: *MemoryMappedList, n: usize) []volatile u8 {
1427 assert(l.items.len + n <= l.capacity);
1428 const prev_len = l.items.len;
1429 l.items.len += n;
1430 return l.items[prev_len..][0..n];
1431 }
1432
1433 /// Called when memory growth is necessary. Returns a capacity larger than
1434 /// minimum that grows super-linearly.
1435 fn growCapacity(current: usize, minimum: usize) usize {
1436 var new = current;
1437 while (true) {
1438 new = mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
1439 if (new >= minimum) return new;
1440 }
1441 }
1442
1443 pub fn insertAssumeCapacity(l: *MemoryMappedList, i: usize, item: u8) void {
1444 assert(l.items.len + 1 <= l.capacity);
1445 l.items.len += 1;
1446 volatileCopyBackwards(u8, l.items[i + 1 ..], l.items[i .. l.items.len - 1]);
1447 l.items[i] = item;
1448 }
1449
1450 pub fn orderedRemove(l: *MemoryMappedList, i: usize) u8 {
1451 assert(l.items.len + 1 <= l.capacity);
1452 const old = l.items[i];
1453 volatileCopyForwards(u8, l.items[i .. l.items.len - 1], l.items[i + 1 ..]);
1454 l.items.len -= 1;
1455 return old;
1456 }
1457};