zig/lib/fuzzer.zig at master

   1const builtin = @import("builtin");
   2const std = @import("std");
   3const fatal = std.process.fatal;
   4const mem = std.mem;
   5const math = std.math;
   6const Allocator = mem.Allocator;
   7const assert = std.debug.assert;
   8const panic = std.debug.panic;
   9const abi = std.Build.abi.fuzz;
  10const native_endian = builtin.cpu.arch.endian();
  11
  12pub const std_options = std.Options{
  13    .logFn = logOverride,
  14};
  15
  16fn logOverride(
  17    comptime level: std.log.Level,
  18    comptime scope: @EnumLiteral(),
  19    comptime format: []const u8,
  20    args: anytype,
  21) void {
  22    const f = log_f orelse
  23        panic("attempt to use log before initialization, message:\n" ++ format, args);
  24    f.lock(.exclusive) catch |e| panic("failed to lock logging file: {t}", .{e});
  25    defer f.unlock();
  26
  27    var buf: [256]u8 = undefined;
  28    var fw = f.writer(&buf);
  29    const end = f.getEndPos() catch |e| panic("failed to get fuzzer log file end: {t}", .{e});
  30    fw.seekTo(end) catch |e| panic("failed to seek to fuzzer log file end: {t}", .{e});
  31
  32    const prefix1 = comptime level.asText();
  33    const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
  34    fw.interface.print(
  35        "[{s}] " ++ prefix1 ++ prefix2 ++ format ++ "\n",
  36        .{current_test_name orelse "setup"} ++ args,
  37    ) catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
  38    fw.interface.flush() catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
  39}
  40
  41var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
  42const gpa = switch (builtin.mode) {
  43    .Debug => debug_allocator.allocator(),
  44    .ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator,
  45};
  46
  47/// Part of `exec`, however seperate to allow it to be set before `exec` is.
  48var log_f: ?std.fs.File = null;
  49var exec: Executable = .preinit;
  50var inst: Instrumentation = .preinit;
  51var fuzzer: Fuzzer = undefined;
  52var current_test_name: ?[]const u8 = null;
  53
  54fn bitsetUsizes(elems: usize) usize {
  55    return math.divCeil(usize, elems, @bitSizeOf(usize)) catch unreachable;
  56}
  57
  58const Executable = struct {
  59    /// Tracks the hit count for each pc as updated by the process's instrumentation.
  60    pc_counters: []u8,
  61
  62    cache_f: std.fs.Dir,
  63    /// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed
  64    /// while the fuzzer is running.
  65    shared_seen_pcs: MemoryMappedList,
  66    /// Hash of pcs used to uniquely identify the shared coverage file
  67    pc_digest: u64,
  68
  69    /// A minimal state for this struct which instrumentation can function on.
  70    /// Used before this structure is initialized to avoid illegal behavior
  71    /// from instrumentation functions being called and using undefined values.
  72    pub const preinit: Executable = .{
  73        .pc_counters = undefined, // instrumentation works off the __sancov_cntrs section
  74        .cache_f = undefined,
  75        .shared_seen_pcs = undefined,
  76        .pc_digest = undefined,
  77    };
  78
  79    fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList {
  80        const pc_bitset_usizes = bitsetUsizes(pcs.len);
  81        const coverage_file_name = std.fmt.hex(pc_digest);
  82        comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
  83        comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr);
  84
  85        var v = cache_dir.makeOpenPath("v", .{}) catch |e|
  86            panic("failed to create directory 'v': {t}", .{e});
  87        defer v.close();
  88        const coverage_file, const populate = if (v.createFile(&coverage_file_name, .{
  89            .read = true,
  90            // If we create the file, we want to block other processes while we populate it
  91            .lock = .exclusive,
  92            .exclusive = true,
  93        })) |f|
  94            .{ f, true }
  95        else |e| switch (e) {
  96            error.PathAlreadyExists => .{ v.openFile(&coverage_file_name, .{
  97                .mode = .read_write,
  98                .lock = .shared,
  99            }) catch |e2| panic(
 100                "failed to open existing coverage file '{s}': {t}",
 101                .{ &coverage_file_name, e2 },
 102            ), false },
 103            else => panic("failed to create coverage file '{s}': {t}", .{ &coverage_file_name, e }),
 104        };
 105
 106        const coverage_file_len = @sizeOf(abi.SeenPcsHeader) +
 107            pc_bitset_usizes * @sizeOf(usize) +
 108            pcs.len * @sizeOf(usize);
 109
 110        if (populate) {
 111            defer coverage_file.lock(.shared) catch |e| panic(
 112                "failed to demote lock for coverage file '{s}': {t}",
 113                .{ &coverage_file_name, e },
 114            );
 115            var map = MemoryMappedList.create(coverage_file, 0, coverage_file_len) catch |e| panic(
 116                "failed to init memory map for coverage file '{s}': {t}",
 117                .{ &coverage_file_name, e },
 118            );
 119            map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{
 120                .n_runs = 0,
 121                .unique_runs = 0,
 122                .pcs_len = pcs.len,
 123            }));
 124            map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
 125            // Relocations have been applied to `pcs` so it contains runtime addresses (with slide
 126            // applied). We need to translate these to the virtual addresses as on disk.
 127            for (pcs) |pc| {
 128                const pc_vaddr = fuzzer_unslide_address(pc);
 129                map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr));
 130            }
 131            return map;
 132        } else {
 133            const size = coverage_file.getEndPos() catch |e| panic(
 134                "failed to stat coverage file '{s}': {t}",
 135                .{ &coverage_file_name, e },
 136            );
 137            if (size != coverage_file_len) panic(
 138                "incompatible existing coverage file '{s}' (differing lengths: {} != {})",
 139                .{ &coverage_file_name, size, coverage_file_len },
 140            );
 141
 142            const map = MemoryMappedList.init(
 143                coverage_file,
 144                coverage_file_len,
 145                coverage_file_len,
 146            ) catch |e| panic(
 147                "failed to init memory map for coverage file '{s}': {t}",
 148                .{ &coverage_file_name, e },
 149            );
 150
 151            const seen_pcs_header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(map.items));
 152            if (seen_pcs_header.pcs_len != pcs.len) panic(
 153                "incompatible existing coverage file '{s}' (differing pcs length: {} != {})",
 154                .{ &coverage_file_name, seen_pcs_header.pcs_len, pcs.len },
 155            );
 156            if (mem.indexOfDiff(usize, seen_pcs_header.pcAddrs(), pcs)) |i| panic(
 157                "incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})",
 158                .{ &coverage_file_name, i, seen_pcs_header.pcAddrs()[i], pcs[i] },
 159            );
 160
 161            return map;
 162        }
 163    }
 164
 165    pub fn init(cache_dir_path: []const u8) Executable {
 166        var self: Executable = undefined;
 167
 168        const cache_dir = std.fs.cwd().makeOpenPath(cache_dir_path, .{}) catch |e| panic(
 169            "failed to open directory '{s}': {t}",
 170            .{ cache_dir_path, e },
 171        );
 172        log_f = cache_dir.createFile("tmp/libfuzzer.log", .{ .truncate = false }) catch |e|
 173            panic("failed to create file 'tmp/libfuzzer.log': {t}", .{e});
 174        self.cache_f = cache_dir.makeOpenPath("f", .{}) catch |e|
 175            panic("failed to open directory 'f': {t}", .{e});
 176
 177        // Linkers are expected to automatically add symbols prefixed with these for the start and
 178        // end of sections whose names are valid C identifiers.
 179        const ofmt = builtin.object_format;
 180        const section_start_prefix, const section_end_prefix = switch (ofmt) {
 181            .elf => .{ "__start_", "__stop_" },
 182            .macho => .{ "\x01section$start$__DATA$", "\x01section$end$__DATA$" },
 183            else => @compileError("unsupported fuzzing object format '" ++ @tagName(ofmt) ++ "'"),
 184        };
 185
 186        self.pc_counters = blk: {
 187            const pc_counters_start_name = section_start_prefix ++ "__sancov_cntrs";
 188            const pc_counters_start = @extern([*]u8, .{
 189                .name = pc_counters_start_name,
 190                .linkage = .weak,
 191            }) orelse panic("missing {s} symbol", .{pc_counters_start_name});
 192
 193            const pc_counters_end_name = section_end_prefix ++ "__sancov_cntrs";
 194            const pc_counters_end = @extern([*]u8, .{
 195                .name = pc_counters_end_name,
 196                .linkage = .weak,
 197            }) orelse panic("missing {s} symbol", .{pc_counters_end_name});
 198
 199            break :blk pc_counters_start[0 .. pc_counters_end - pc_counters_start];
 200        };
 201
 202        const pcs = blk: {
 203            const pcs_start_name = section_start_prefix ++ "__sancov_pcs1";
 204            const pcs_start = @extern([*]usize, .{
 205                .name = pcs_start_name,
 206                .linkage = .weak,
 207            }) orelse panic("missing {s} symbol", .{pcs_start_name});
 208
 209            const pcs_end_name = section_end_prefix ++ "__sancov_pcs1";
 210            const pcs_end = @extern([*]usize, .{
 211                .name = pcs_end_name,
 212                .linkage = .weak,
 213            }) orelse panic("missing {s} symbol", .{pcs_end_name});
 214
 215            break :blk pcs_start[0 .. pcs_end - pcs_start];
 216        };
 217
 218        if (self.pc_counters.len != pcs.len) panic(
 219            "pc counters length and pcs length do not match ({} != {})",
 220            .{ self.pc_counters.len, pcs.len },
 221        );
 222
 223        self.pc_digest = digest: {
 224            // Relocations have been applied to `pcs` so it contains runtime addresses (with slide
 225            // applied). We need to translate these to the virtual addresses as on disk.
 226            var h: std.hash.Wyhash = .init(0);
 227            for (pcs) |pc| {
 228                const pc_vaddr = fuzzer_unslide_address(pc);
 229                h.update(@ptrCast(&pc_vaddr));
 230            }
 231            break :digest h.final();
 232        };
 233        self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
 234
 235        return self;
 236    }
 237
 238    pub fn pcBitsetIterator(self: Executable) PcBitsetIterator {
 239        return .{ .pc_counters = self.pc_counters };
 240    }
 241
 242    /// Iterates over pc_counters returning a bitset for if each of them have been hit
 243    pub const PcBitsetIterator = struct {
 244        index: usize = 0,
 245        pc_counters: []u8,
 246
 247        pub fn next(self: *PcBitsetIterator) usize {
 248            const rest = self.pc_counters[self.index..];
 249            if (rest.len >= @bitSizeOf(usize)) {
 250                defer self.index += @bitSizeOf(usize);
 251                const V = @Vector(@bitSizeOf(usize), u8);
 252                return @as(usize, @bitCast(@as(V, @splat(0)) != rest[0..@bitSizeOf(usize)].*));
 253            } else if (rest.len != 0) {
 254                defer self.index += rest.len;
 255                var res: usize = 0;
 256                for (0.., rest) |bit_index, byte| {
 257                    res |= @shlExact(@as(usize, @intFromBool(byte != 0)), @intCast(bit_index));
 258                }
 259                return res;
 260            } else unreachable;
 261        }
 262    };
 263};
 264
 265/// Data gathered from instrumentation functions.
 266/// Seperate from Executable since its state is resetable and changes.
 267/// Seperate from Fuzzer since it may be needed before fuzzing starts.
 268const Instrumentation = struct {
 269    /// Bitset of seen pcs across all runs excluding fresh pcs.
 270    /// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using
 271    /// it which causes contention and unrelated pcs to our campaign being set.
 272    seen_pcs: []usize,
 273
 274    /// Stores a fresh input's new pcs
 275    fresh_pcs: []usize,
 276
 277    /// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx
 278    /// have been called from and have had their already been added to const_x_vals
 279    const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty,
 280    /// Values that have been constant operands in comparisons and switch cases.
 281    /// There may be duplicates in this array if they came from different addresses, which is
 282    /// fine as they are likely more important and hence more likely to be selected.
 283    const_vals2: std.ArrayList(u16) = .empty,
 284    const_vals4: std.ArrayList(u32) = .empty,
 285    const_vals8: std.ArrayList(u64) = .empty,
 286    const_vals16: std.ArrayList(u128) = .empty,
 287
 288    /// A minimal state for this struct which instrumentation can function on.
 289    /// Used before this structure is initialized to avoid illegal behavior
 290    /// from instrumentation functions being called and using undefined values.
 291    pub const preinit: Instrumentation = .{
 292        .seen_pcs = undefined, // currently only updated by `Fuzzer`
 293        .fresh_pcs = undefined,
 294    };
 295
 296    pub fn depreinit(self: *Instrumentation) void {
 297        self.const_vals2.deinit(gpa);
 298        self.const_vals4.deinit(gpa);
 299        self.const_vals8.deinit(gpa);
 300        self.const_vals16.deinit(gpa);
 301        self.* = undefined;
 302    }
 303
 304    pub fn init() Instrumentation {
 305        const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len);
 306        const alloc_usizes = pc_bitset_usizes * 2;
 307        const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM");
 308        var fba_ctx: std.heap.FixedBufferAllocator = .init(buf);
 309        const fba = fba_ctx.allocator();
 310
 311        var self: Instrumentation = .{
 312            .seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
 313            .fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
 314        };
 315        self.reset();
 316        return self;
 317    }
 318
 319    pub fn reset(self: *Instrumentation) void {
 320        @memset(self.seen_pcs, 0);
 321        @memset(self.fresh_pcs, 0);
 322        self.const_pcs.clearRetainingCapacity();
 323        self.const_vals2.clearRetainingCapacity();
 324        self.const_vals4.clearRetainingCapacity();
 325        self.const_vals8.clearRetainingCapacity();
 326        self.const_vals16.clearRetainingCapacity();
 327    }
 328
 329    /// If false is returned, then the pc is marked as seen
 330    pub fn constPcSeen(self: *Instrumentation, pc: usize) bool {
 331        return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing;
 332    }
 333
 334    pub fn isFresh(self: *Instrumentation) bool {
 335        var hit_pcs = exec.pcBitsetIterator();
 336        for (self.seen_pcs) |seen_pcs| {
 337            if (hit_pcs.next() & ~seen_pcs != 0) return true;
 338        }
 339
 340        return false;
 341    }
 342
 343    /// Updates `fresh_pcs`
 344    pub fn setFresh(self: *Instrumentation) void {
 345        var hit_pcs = exec.pcBitsetIterator();
 346        for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| {
 347            fresh_pcs.* = hit_pcs.next() & ~seen_pcs;
 348        }
 349    }
 350
 351    /// Returns if `exec.pc_counters` is a superset of `fresh_pcs`.
 352    pub fn atleastFresh(self: *Instrumentation) bool {
 353        var hit_pcs = exec.pcBitsetIterator();
 354        for (self.fresh_pcs) |fresh_pcs| {
 355            if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false;
 356        }
 357        return true;
 358    }
 359
 360    /// Updates based off `fresh_pcs`
 361    fn updateSeen(self: *Instrumentation) void {
 362        comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
 363        const shared_seen_pcs: [*]volatile usize = @ptrCast(
 364            exec.shared_seen_pcs.items[@sizeOf(abi.SeenPcsHeader)..].ptr,
 365        );
 366
 367        for (self.seen_pcs, shared_seen_pcs, self.fresh_pcs) |*seen, *shared_seen, fresh| {
 368            seen.* |= fresh;
 369            if (fresh != 0)
 370                _ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic);
 371        }
 372    }
 373};
 374
 375const Fuzzer = struct {
 376    arena_ctx: std.heap.ArenaAllocator = .init(gpa),
 377    rng: std.Random.DefaultPrng = .init(0),
 378    test_one: abi.TestOne,
 379    /// The next input that will be given to the testOne function. When the
 380    /// current process crashes, this memory-mapped file is used to recover the
 381    /// input.
 382    input: MemoryMappedList,
 383
 384    /// Minimized past inputs leading to new pc hits.
 385    /// These are randomly mutated in round-robin fashion
 386    /// Element zero is always an empty input. It is gauraunteed no other elements are empty.
 387    corpus: std.ArrayList([]const u8),
 388    corpus_pos: usize,
 389    /// List of past mutations that have led to new inputs. This way, the mutations that are the
 390    /// most effective are the most likely to be selected again. Starts with one of each mutation.
 391    mutations: std.ArrayList(Mutation) = .empty,
 392
 393    /// Filesystem directory containing found inputs for future runs
 394    corpus_dir: std.fs.Dir,
 395    corpus_dir_idx: usize = 0,
 396
 397    pub fn init(test_one: abi.TestOne, unit_test_name: []const u8) Fuzzer {
 398        var self: Fuzzer = .{
 399            .test_one = test_one,
 400            .input = undefined,
 401            .corpus = .empty,
 402            .corpus_pos = 0,
 403            .mutations = .empty,
 404            .corpus_dir = undefined,
 405        };
 406        const arena = self.arena_ctx.allocator();
 407
 408        self.corpus_dir = exec.cache_f.makeOpenPath(unit_test_name, .{}) catch |e|
 409            panic("failed to open directory '{s}': {t}", .{ unit_test_name, e });
 410        self.input = in: {
 411            const f = self.corpus_dir.createFile("in", .{
 412                .read = true,
 413                .truncate = false,
 414                // In case any other fuzz tests are running under the same test name,
 415                // the input file is exclusively locked to ensures only one proceeds.
 416                .lock = .exclusive,
 417                .lock_nonblocking = true,
 418            }) catch |e| switch (e) {
 419                error.WouldBlock => @panic("input file 'in' is in use by another fuzzing process"),
 420                else => panic("failed to create input file 'in': {t}", .{e}),
 421            };
 422            const size = f.getEndPos() catch |e| panic("failed to stat input file 'in': {t}", .{e});
 423            const map = (if (size < std.heap.page_size_max)
 424                MemoryMappedList.create(f, 8, std.heap.page_size_max)
 425            else
 426                MemoryMappedList.init(f, size, size)) catch |e|
 427                panic("failed to memory map input file 'in': {t}", .{e});
 428
 429            // Perform a dry-run of the stored input if there was one in case it might reproduce a
 430            // crash.
 431            const old_in_len = mem.littleToNative(usize, mem.bytesAsValue(usize, map.items[0..8]).*);
 432            if (size >= 8 and old_in_len != 0 and map.items.len - 8 < old_in_len) {
 433                test_one(.fromSlice(@volatileCast(map.items[8..][0..old_in_len])));
 434            }
 435
 436            break :in map;
 437        };
 438        inst.reset();
 439
 440        self.mutations.appendSlice(gpa, std.meta.tags(Mutation)) catch @panic("OOM");
 441        // Ensure there is never an empty corpus. Additionally, an empty input usually leads to
 442        // new inputs.
 443        self.addInput(&.{});
 444
 445        while (true) {
 446            var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
 447            const bytes = self.corpus_dir.readFileAlloc(
 448                std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable,
 449                arena,
 450                .unlimited,
 451            ) catch |e| switch (e) {
 452                error.FileNotFound => break,
 453                else => panic("failed to read corpus file '{x}': {t}", .{ self.corpus_dir_idx, e }),
 454            };
 455            // No corpus file of length zero will ever be created
 456            if (bytes.len == 0)
 457                panic("corrupt corpus file '{x}' (len of zero)", .{self.corpus_dir_idx});
 458            self.addInput(bytes);
 459            self.corpus_dir_idx += 1;
 460        }
 461
 462        return self;
 463    }
 464
 465    pub fn deinit(self: *Fuzzer) void {
 466        self.input.deinit();
 467        self.corpus.deinit(gpa);
 468        self.mutations.deinit(gpa);
 469        self.corpus_dir.close();
 470        self.arena_ctx.deinit();
 471        self.* = undefined;
 472    }
 473
 474    pub fn addInput(self: *Fuzzer, bytes: []const u8) void {
 475        self.corpus.append(gpa, bytes) catch @panic("OOM");
 476        self.input.clearRetainingCapacity();
 477        self.input.ensureTotalCapacity(8 + bytes.len) catch |e|
 478            panic("could not resize shared input file: {t}", .{e});
 479        self.input.items.len = 8;
 480        self.input.appendSliceAssumeCapacity(bytes);
 481        self.run();
 482        inst.setFresh();
 483        inst.updateSeen();
 484    }
 485
 486    /// Assumes `fresh_pcs` correspond to the input
 487    fn minimizeInput(self: *Fuzzer) void {
 488        // The minimization technique is kept relatively simple, we sequentially try to remove each
 489        // byte and check that the new pcs and memory loads are still hit.
 490        var i = self.input.items.len;
 491        while (i != 8) {
 492            i -= 1;
 493            const old = self.input.orderedRemove(i);
 494
 495            @memset(exec.pc_counters, 0);
 496            self.run();
 497
 498            if (!inst.atleastFresh()) {
 499                self.input.insertAssumeCapacity(i, old);
 500            } else {
 501                // This removal may have led to new pcs or memory loads being hit, so we need to
 502                // update them to avoid duplicates.
 503                inst.setFresh();
 504            }
 505        }
 506    }
 507
 508    fn run(self: *Fuzzer) void {
 509        // `pc_counters` is not cleared since only new hits are relevant.
 510
 511        mem.bytesAsValue(usize, self.input.items[0..8]).* =
 512            mem.nativeToLittle(usize, self.input.items.len - 8);
 513        self.test_one(.fromSlice(@volatileCast(self.input.items[8..])));
 514
 515        const header = mem.bytesAsValue(
 516            abi.SeenPcsHeader,
 517            exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
 518        );
 519        _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
 520    }
 521
 522    pub fn cycle(self: *Fuzzer) void {
 523        const input = self.corpus.items[self.corpus_pos];
 524        self.corpus_pos += 1;
 525        if (self.corpus_pos == self.corpus.items.len)
 526            self.corpus_pos = 0;
 527
 528        const rng = self.rng.random();
 529        const m = while (true) {
 530            const m = self.mutations.items[rng.uintLessThanBiased(usize, self.mutations.items.len)];
 531            if (!m.mutate(
 532                rng,
 533                input,
 534                &self.input,
 535                self.corpus.items,
 536                inst.const_vals2.items,
 537                inst.const_vals4.items,
 538                inst.const_vals8.items,
 539                inst.const_vals16.items,
 540            )) continue;
 541            break m;
 542        };
 543
 544        self.run();
 545
 546        if (inst.isFresh()) {
 547            @branchHint(.unlikely);
 548
 549            const header = mem.bytesAsValue(
 550                abi.SeenPcsHeader,
 551                exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
 552            );
 553            _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
 554
 555            inst.setFresh();
 556            self.minimizeInput();
 557            inst.updateSeen();
 558
 559            // An empty-input has always been tried, so if an empty input is fresh then the
 560            // test has to be non-deterministic. This has to be checked as duplicate empty
 561            // entries are not allowed.
 562            if (self.input.items.len - 8 == 0) {
 563                std.log.warn("non-deterministic test (empty input produces different hits)", .{});
 564                _ = @atomicRmw(usize, &header.unique_runs, .Sub, 1, .monotonic);
 565                return;
 566            }
 567
 568            const arena = self.arena_ctx.allocator();
 569            const bytes = arena.dupe(u8, @volatileCast(self.input.items[8..])) catch @panic("OOM");
 570
 571            self.corpus.append(gpa, bytes) catch @panic("OOM");
 572            self.mutations.appendNTimes(gpa, m, 6) catch @panic("OOM");
 573
 574            // Write new corpus to cache
 575            var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
 576            self.corpus_dir.writeFile(.{
 577                .sub_path = std.fmt.bufPrint(
 578                    &name_buf,
 579                    "{x}",
 580                    .{self.corpus_dir_idx},
 581                ) catch unreachable,
 582                .data = bytes,
 583            }) catch |e| panic(
 584                "failed to write corpus file '{x}': {t}",
 585                .{ self.corpus_dir_idx, e },
 586            );
 587            self.corpus_dir_idx += 1;
 588        }
 589    }
 590};
 591
 592/// Instrumentation must not be triggered before this function is called
 593export fn fuzzer_init(cache_dir_path: abi.Slice) void {
 594    inst.depreinit();
 595    exec = .init(cache_dir_path.toSlice());
 596    inst = .init();
 597}
 598
 599/// Invalid until `fuzzer_init` is called.
 600export fn fuzzer_coverage() abi.Coverage {
 601    const coverage_id = exec.pc_digest;
 602    const header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(exec.shared_seen_pcs.items.ptr));
 603
 604    var seen_count: usize = 0;
 605    for (header.seenBits()) |chunk| {
 606        seen_count += @popCount(chunk);
 607    }
 608
 609    return .{
 610        .id = coverage_id,
 611        .runs = header.n_runs,
 612        .unique = header.unique_runs,
 613        .seen = seen_count,
 614    };
 615}
 616
 617/// fuzzer_init must be called beforehand
 618export fn fuzzer_init_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void {
 619    current_test_name = unit_test_name.toSlice();
 620    fuzzer = .init(test_one, unit_test_name.toSlice());
 621}
 622
 623/// fuzzer_init_test must be called beforehand
 624/// The callee owns the memory of bytes and must not free it until the fuzzer is finished.
 625export fn fuzzer_new_input(bytes: abi.Slice) void {
 626    // An entry of length zero is always added and duplicates of it are not allowed.
 627    if (bytes.len != 0)
 628        fuzzer.addInput(bytes.toSlice());
 629}
 630
 631/// fuzzer_init_test must be called first
 632export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void {
 633    switch (limit_kind) {
 634        .forever => while (true) fuzzer.cycle(),
 635        .iterations => for (0..amount) |_| fuzzer.cycle(),
 636    }
 637}
 638
 639export fn fuzzer_unslide_address(addr: usize) usize {
 640    const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
 641    const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| {
 642        std.debug.panic("failed to find virtual address slide: {t}", .{err});
 643    };
 644    return addr - slide;
 645}
 646
 647/// Helps determine run uniqueness in the face of recursion.
 648/// Currently not used by the fuzzer.
 649export threadlocal var __sancov_lowest_stack: usize = 0;
 650
 651/// Inline since the return address of the callee is required
 652inline fn genericConstCmp(T: anytype, val: T, comptime const_vals_field: []const u8) void {
 653    if (!inst.constPcSeen(@returnAddress())) {
 654        @branchHint(.unlikely);
 655        @field(inst, const_vals_field).append(gpa, val) catch @panic("OOM");
 656    }
 657}
 658
 659export fn __sanitizer_cov_trace_const_cmp1(const_arg: u8, arg: u8) void {
 660    _ = const_arg;
 661    _ = arg;
 662}
 663
 664export fn __sanitizer_cov_trace_const_cmp2(const_arg: u16, arg: u16) void {
 665    _ = arg;
 666    genericConstCmp(u16, const_arg, "const_vals2");
 667}
 668
 669export fn __sanitizer_cov_trace_const_cmp4(const_arg: u32, arg: u32) void {
 670    _ = arg;
 671    genericConstCmp(u32, const_arg, "const_vals4");
 672}
 673
 674export fn __sanitizer_cov_trace_const_cmp8(const_arg: u64, arg: u64) void {
 675    _ = arg;
 676    genericConstCmp(u64, const_arg, "const_vals8");
 677}
 678
 679export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void {
 680    _ = val;
 681    if (!inst.constPcSeen(@returnAddress())) {
 682        @branchHint(.unlikely);
 683        const case_bits = cases[1];
 684        const cases_slice = cases[2..][0..cases[0]];
 685        switch (case_bits) {
 686            // 8-bit cases are ignored because they are likely to be randomly generated
 687            0...8 => {},
 688            9...16 => for (cases_slice) |c|
 689                inst.const_vals2.append(gpa, @truncate(c)) catch @panic("OOM"),
 690            17...32 => for (cases_slice) |c|
 691                inst.const_vals4.append(gpa, @truncate(c)) catch @panic("OOM"),
 692            33...64 => for (cases_slice) |c|
 693                inst.const_vals8.append(gpa, @truncate(c)) catch @panic("OOM"),
 694            else => {}, // Should be impossible
 695        }
 696    }
 697}
 698
 699export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
 700    _ = arg1;
 701    _ = arg2;
 702}
 703
 704export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
 705    _ = arg1;
 706    _ = arg2;
 707}
 708
 709export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
 710    _ = arg1;
 711    _ = arg2;
 712}
 713
 714export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
 715    _ = arg1;
 716    _ = arg2;
 717}
 718
 719export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
 720    // Not valuable because we already have pc tracing via 8bit counters.
 721    _ = callee;
 722}
 723export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void {
 724    // clang will emit a call to this function when compiling with code coverage instrumentation.
 725    // however, fuzzer_init() does not need this information since it directly reads from the
 726    // symbol table.
 727    _ = start;
 728    _ = end;
 729}
 730export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
 731    // clang will emit a call to this function when compiling with code coverage instrumentation.
 732    // however, fuzzer_init() does not need this information since it directly reads from the
 733    // symbol table.
 734    _ = start;
 735    _ = end;
 736}
 737
 738/// Copy all of source into dest at position 0.
 739/// If the slices overlap, dest.ptr must be <= src.ptr.
 740fn volatileCopyForwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
 741    for (dest, source) |*d, s| d.* = s;
 742}
 743
 744/// Copy all of source into dest at position 0.
 745/// If the slices overlap, dest.ptr must be >= src.ptr.
 746fn volatileCopyBackwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
 747    var i = source.len;
 748    while (i > 0) {
 749        i -= 1;
 750        dest[i] = source[i];
 751    }
 752}
 753
 754const Mutation = enum {
 755    /// Applies .insert_*_span, .push_*_span
 756    /// For wtf-8, this limits code units, not code points
 757    const max_insert_len = 12;
 758    /// Applies to .insert_large_*_span and .push_large_*_span
 759    /// 4096 is used as it is a common sector size
 760    const max_large_insert_len = 4096;
 761    /// Applies to .delete_span and .pop_span
 762    const max_delete_len = 16;
 763    /// Applies to .set_*span, .move_span, .set_existing_span
 764    const max_set_len = 12;
 765    const max_replicate_len = 64;
 766    const AddValue = i6;
 767    const SmallValue = i10;
 768
 769    delete_byte,
 770    delete_span,
 771    /// Removes the last byte from the input
 772    pop_byte,
 773    pop_span,
 774    /// Inserts a group of bytes which is already in the input and removes the original copy.
 775    move_span,
 776    /// Replaces a group of bytes in the input with another group of bytes in the input
 777    set_existing_span,
 778    insert_existing_span,
 779    push_existing_span,
 780    set_rng_byte,
 781    set_rng_span,
 782    insert_rng_byte,
 783    insert_rng_span,
 784    /// Adds a byte to the end of the input
 785    push_rng_byte,
 786    push_rng_span,
 787    set_zero_byte,
 788    set_zero_span,
 789    insert_zero_byte,
 790    insert_zero_span,
 791    push_zero_byte,
 792    push_zero_span,
 793    /// Inserts a lot of zeros to the end of the input
 794    /// This is intended to work with fuzz tests that require data in (large) blocks
 795    push_large_zero_span,
 796    /// Inserts a group of ascii printable character
 797    insert_print_span,
 798    /// Inserts a group of character from a...z, A...Z, 0...9, _, and ' '
 799    insert_common_span,
 800    /// Inserts a group of ascii digits possibly preceded by a `-`
 801    insert_integer,
 802    /// Code units are evenly distributed between one to four
 803    insert_wtf8_char,
 804    insert_wtf8_span,
 805    /// Inserts a group of bytes from another input
 806    insert_splice_span,
 807    // utf16 is not yet included since insertion of random bytes should adaquetly check
 808    // BMP character, surrogate handling, and occasionally chacters outside of the BMP.
 809    set_print_span,
 810    set_common_span,
 811    set_splice_span,
 812    /// Similar to set_splice_span, but the bytes are copied to the same index instead of a random
 813    replicate_splice_span,
 814    push_print_span,
 815    push_common_span,
 816    push_integer,
 817    push_wtf8_char,
 818    push_wtf8_span,
 819    push_splice_span,
 820    /// Clears a random amount of high bits of a byte
 821    truncate_8,
 822    truncate_16le,
 823    truncate_16be,
 824    truncate_32le,
 825    truncate_32be,
 826    truncate_64le,
 827    truncate_64be,
 828    /// Flips a random bit
 829    xor_1,
 830    /// Swaps up to three bits of a byte biased to less bits
 831    xor_few_8,
 832    /// Swaps up to six bits of a 16-bit value biased to less bits
 833    xor_few_16,
 834    /// Swaps up to nine bits of a 32-bit value biased to less bits
 835    xor_few_32,
 836    /// Swaps up to twelve bits of 64-bit value biased to less bits
 837    xor_few_64,
 838    /// Adds to a byte a value of type AddValue
 839    add_8,
 840    add_16le,
 841    add_16be,
 842    add_32le,
 843    add_32be,
 844    add_64le,
 845    add_64be,
 846    /// Sets a 16-bit little-endian value to a value of type SmallValue
 847    set_small_16le,
 848    set_small_16be,
 849    set_small_32le,
 850    set_small_32be,
 851    set_small_64le,
 852    set_small_64be,
 853    insert_small_16le,
 854    insert_small_16be,
 855    insert_small_32le,
 856    insert_small_32be,
 857    insert_small_64le,
 858    insert_small_64be,
 859    push_small_16le,
 860    push_small_16be,
 861    push_small_32le,
 862    push_small_32be,
 863    push_small_64le,
 864    push_small_64be,
 865    set_const_16,
 866    set_const_32,
 867    set_const_64,
 868    set_const_128,
 869    insert_const_16,
 870    insert_const_32,
 871    insert_const_64,
 872    insert_const_128,
 873    push_const_16,
 874    push_const_32,
 875    push_const_64,
 876    push_const_128,
 877    /// Sets a byte with up to three bits set biased to less bits
 878    set_few_8,
 879    /// Sets a 16-bit value with up to six bits set biased to less bits
 880    set_few_16,
 881    /// Sets a 32-bit value with up to nine bits set biased to less bits
 882    set_few_32,
 883    /// Sets a 64-bit value with up to twelve bits set biased to less bits
 884    set_few_64,
 885    insert_few_8,
 886    insert_few_16,
 887    insert_few_32,
 888    insert_few_64,
 889    push_few_8,
 890    push_few_16,
 891    push_few_32,
 892    push_few_64,
 893    /// Randomizes a random contigous group of bits in a byte
 894    packed_set_rng_8,
 895    packed_set_rng_16le,
 896    packed_set_rng_16be,
 897    packed_set_rng_32le,
 898    packed_set_rng_32be,
 899    packed_set_rng_64le,
 900    packed_set_rng_64be,
 901
 902    fn fewValue(rng: std.Random, T: type, comptime bits: u16) T {
 903        var result: T = 0;
 904        var remaining_bits = rng.intRangeAtMostBiased(u16, 1, bits);
 905        while (remaining_bits > 0) {
 906            result |= @shlExact(@as(T, 1), rng.int(math.Log2Int(T)));
 907            remaining_bits -= 1;
 908        }
 909        return result;
 910    }
 911
 912    /// Returns if the mutation was applicable to the input
 913    pub fn mutate(
 914        mutation: Mutation,
 915        rng: std.Random,
 916        in: []const u8,
 917        out: *MemoryMappedList,
 918        corpus: []const []const u8,
 919        const_vals2: []const u16,
 920        const_vals4: []const u32,
 921        const_vals8: []const u64,
 922        const_vals16: []const u128,
 923    ) bool {
 924        out.clearRetainingCapacity();
 925        const new_capacity = 8 + in.len + @max(
 926            16, // builtin 128 value
 927            Mutation.max_insert_len,
 928            Mutation.max_large_insert_len,
 929        );
 930        out.ensureTotalCapacity(new_capacity) catch |e|
 931            panic("could not resize shared input file: {t}", .{e});
 932        out.items.len = 8; // Length field
 933
 934        const applied = switch (mutation) {
 935            inline else => |m| m.comptimeMutate(
 936                rng,
 937                in,
 938                out,
 939                corpus,
 940                const_vals2,
 941                const_vals4,
 942                const_vals8,
 943                const_vals16,
 944            ),
 945        };
 946        if (!applied)
 947            assert(out.items.len == 8)
 948        else
 949            assert(out.items.len <= new_capacity);
 950        return applied;
 951    }
 952
 953    /// Assumes out has already been cleared
 954    fn comptimeMutate(
 955        comptime mutation: Mutation,
 956        rng: std.Random,
 957        in: []const u8,
 958        out: *MemoryMappedList,
 959        corpus: []const []const u8,
 960        const_vals2: []const u16,
 961        const_vals4: []const u32,
 962        const_vals8: []const u64,
 963        const_vals16: []const u128,
 964    ) bool {
 965        const Class = enum { new, remove, rmw, move_span, replicate_splice_span };
 966        const class: Class, const class_ctx = switch (mutation) {
 967            // zig fmt: off
 968            .move_span => .{ .move_span, null },
 969            .replicate_splice_span => .{ .replicate_splice_span, null },
 970
 971            .delete_byte => .{ .remove, .{ .delete, 1 } },
 972            .delete_span => .{ .remove, .{ .delete, max_delete_len } },
 973
 974            .pop_byte => .{ .remove, .{ .pop, 1 } },
 975            .pop_span => .{ .remove, .{ .pop, max_delete_len } },
 976
 977            .set_rng_byte         => .{ .new, .{ .set   ,  1, .rng     , .one              } },
 978            .set_zero_byte        => .{ .new, .{ .set   ,  1, .zero    , .one              } },
 979            .set_rng_span         => .{ .new, .{ .set   ,  1, .rng     , .many             } },
 980            .set_zero_span        => .{ .new, .{ .set   ,  1, .zero    , .many             } },
 981            .set_common_span      => .{ .new, .{ .set   ,  1, .common  , .many             } },
 982            .set_print_span       => .{ .new, .{ .set   ,  1, .print   , .many             } },
 983            .set_existing_span    => .{ .new, .{ .set   ,  2, .existing, .many             } },
 984            .set_splice_span      => .{ .new, .{ .set   ,  1, .splice  , .many             } },
 985            .set_const_16         => .{ .new, .{ .set   ,  2, .@"const", const_vals2       } },
 986            .set_const_32         => .{ .new, .{ .set   ,  4, .@"const", const_vals4       } },
 987            .set_const_64         => .{ .new, .{ .set   ,  8, .@"const", const_vals8       } },
 988            .set_const_128        => .{ .new, .{ .set   , 16, .@"const", const_vals16      } },
 989            .set_small_16le       => .{ .new, .{ .set   ,  2, .small   , .{ i16, .little } } },
 990            .set_small_32le       => .{ .new, .{ .set   ,  4, .small   , .{ i32, .little } } },
 991            .set_small_64le       => .{ .new, .{ .set   ,  8, .small   , .{ i64, .little } } },
 992            .set_small_16be       => .{ .new, .{ .set   ,  2, .small   , .{ i16, .big    } } },
 993            .set_small_32be       => .{ .new, .{ .set   ,  4, .small   , .{ i32, .big    } } },
 994            .set_small_64be       => .{ .new, .{ .set   ,  8, .small   , .{ i64, .big    } } },
 995            .set_few_8            => .{ .new, .{ .set   ,  1, .few     , .{ u8 , 3  }      } },
 996            .set_few_16           => .{ .new, .{ .set   ,  2, .few     , .{ u16, 6  }      } },
 997            .set_few_32           => .{ .new, .{ .set   ,  4, .few     , .{ u32, 9  }      } },
 998            .set_few_64           => .{ .new, .{ .set   ,  8, .few     , .{ u64, 12 }      } },
 999
1000            .insert_rng_byte      => .{ .new, .{ .insert,  0, .rng     , .one              } },
1001            .insert_zero_byte     => .{ .new, .{ .insert,  0, .zero    , .one              } },
1002            .insert_rng_span      => .{ .new, .{ .insert,  0, .rng     , .many             } },
1003            .insert_zero_span     => .{ .new, .{ .insert,  0, .zero    , .many             } },
1004            .insert_print_span    => .{ .new, .{ .insert,  0, .print   , .many             } },
1005            .insert_common_span   => .{ .new, .{ .insert,  0, .common  , .many             } },
1006            .insert_integer       => .{ .new, .{ .insert,  0, .integer , .many             } },
1007            .insert_wtf8_char     => .{ .new, .{ .insert,  0, .wtf8    , .one              } },
1008            .insert_wtf8_span     => .{ .new, .{ .insert,  0, .wtf8    , .many             } },
1009            .insert_existing_span => .{ .new, .{ .insert,  1, .existing, .many             } },
1010            .insert_splice_span   => .{ .new, .{ .insert,  0, .splice  , .many             } },
1011            .insert_const_16      => .{ .new, .{ .insert,  0, .@"const", const_vals2       } },
1012            .insert_const_32      => .{ .new, .{ .insert,  0, .@"const", const_vals4       } },
1013            .insert_const_64      => .{ .new, .{ .insert,  0, .@"const", const_vals8       } },
1014            .insert_const_128     => .{ .new, .{ .insert,  0, .@"const", const_vals16      } },
1015            .insert_small_16le    => .{ .new, .{ .insert,  0, .small   , .{ i16, .little } } },
1016            .insert_small_32le    => .{ .new, .{ .insert,  0, .small   , .{ i32, .little } } },
1017            .insert_small_64le    => .{ .new, .{ .insert,  0, .small   , .{ i64, .little } } },
1018            .insert_small_16be    => .{ .new, .{ .insert,  0, .small   , .{ i16, .big    } } },
1019            .insert_small_32be    => .{ .new, .{ .insert,  0, .small   , .{ i32, .big    } } },
1020            .insert_small_64be    => .{ .new, .{ .insert,  0, .small   , .{ i64, .big    } } },
1021            .insert_few_8         => .{ .new, .{ .insert,  0, .few     , .{ u8 , 3  }      } },
1022            .insert_few_16        => .{ .new, .{ .insert,  0, .few     , .{ u16, 6  }      } },
1023            .insert_few_32        => .{ .new, .{ .insert,  0, .few     , .{ u32, 9  }      } },
1024            .insert_few_64        => .{ .new, .{ .insert,  0, .few     , .{ u64, 12 }      } },
1025
1026            .push_rng_byte        => .{ .new, .{ .push  ,  0, .rng     , .one              } },
1027            .push_zero_byte       => .{ .new, .{ .push  ,  0, .zero    , .one              } },
1028            .push_rng_span        => .{ .new, .{ .push  ,  0, .rng     , .many             } },
1029            .push_zero_span       => .{ .new, .{ .push  ,  0, .zero    , .many             } },
1030            .push_print_span      => .{ .new, .{ .push  ,  0, .print   , .many             } },
1031            .push_common_span     => .{ .new, .{ .push  ,  0, .common  , .many             } },
1032            .push_integer         => .{ .new, .{ .push  ,  0, .integer , .many             } },
1033            .push_large_zero_span => .{ .new, .{ .push  ,  0, .zero    , .large            } },
1034            .push_wtf8_char       => .{ .new, .{ .push  ,  0, .wtf8    , .one              } },
1035            .push_wtf8_span       => .{ .new, .{ .push  ,  0, .wtf8    , .many             } },
1036            .push_existing_span   => .{ .new, .{ .push  ,  1, .existing, .many             } },
1037            .push_splice_span     => .{ .new, .{ .push  ,  0, .splice  , .many             } },
1038            .push_const_16        => .{ .new, .{ .push  ,  0, .@"const", const_vals2       } },
1039            .push_const_32        => .{ .new, .{ .push  ,  0, .@"const", const_vals4       } },
1040            .push_const_64        => .{ .new, .{ .push  ,  0, .@"const", const_vals8       } },
1041            .push_const_128       => .{ .new, .{ .push  ,  0, .@"const", const_vals16      } },
1042            .push_small_16le      => .{ .new, .{ .push  ,  0, .small   , .{ i16, .little } } },
1043            .push_small_32le      => .{ .new, .{ .push  ,  0, .small   , .{ i32, .little } } },
1044            .push_small_64le      => .{ .new, .{ .push  ,  0, .small   , .{ i64, .little } } },
1045            .push_small_16be      => .{ .new, .{ .push  ,  0, .small   , .{ i16, .big    } } },
1046            .push_small_32be      => .{ .new, .{ .push  ,  0, .small   , .{ i32, .big    } } },
1047            .push_small_64be      => .{ .new, .{ .push  ,  0, .small   , .{ i64, .big    } } },
1048            .push_few_8           => .{ .new, .{ .push  ,  0, .few     , .{ u8 , 3  }      } },
1049            .push_few_16          => .{ .new, .{ .push  ,  0, .few     , .{ u16, 6  }      } },
1050            .push_few_32          => .{ .new, .{ .push  ,  0, .few     , .{ u32, 9  }      } },
1051            .push_few_64          => .{ .new, .{ .push  ,  0, .few     , .{ u64, 12 }      } },
1052
1053            .xor_1               => .{ .rmw, .{ .xor       , u8 , native_endian, 1  } },
1054            .xor_few_8           => .{ .rmw, .{ .xor       , u8 , native_endian, 3  } },
1055            .xor_few_16          => .{ .rmw, .{ .xor       , u16, native_endian, 6  } },
1056            .xor_few_32          => .{ .rmw, .{ .xor       , u32, native_endian, 9  } },
1057            .xor_few_64          => .{ .rmw, .{ .xor       , u64, native_endian, 12 } },
1058
1059            .truncate_8          => .{ .rmw, .{ .truncate  , u8 , native_endian, {} } },
1060            .truncate_16le       => .{ .rmw, .{ .truncate  , u16, .little      , {} } },
1061            .truncate_32le       => .{ .rmw, .{ .truncate  , u32, .little      , {} } },
1062            .truncate_64le       => .{ .rmw, .{ .truncate  , u64, .little      , {} } },
1063            .truncate_16be       => .{ .rmw, .{ .truncate  , u16, .big         , {} } },
1064            .truncate_32be       => .{ .rmw, .{ .truncate  , u32, .big         , {} } },
1065            .truncate_64be       => .{ .rmw, .{ .truncate  , u64, .big         , {} } },
1066
1067            .add_8               => .{ .rmw, .{ .add       , i8 , native_endian, {} } },
1068            .add_16le            => .{ .rmw, .{ .add       , i16, .little      , {} } },
1069            .add_32le            => .{ .rmw, .{ .add       , i32, .little      , {} } },
1070            .add_64le            => .{ .rmw, .{ .add       , i64, .little      , {} } },
1071            .add_16be            => .{ .rmw, .{ .add       , i16, .big         , {} } },
1072            .add_32be            => .{ .rmw, .{ .add       , i32, .big         , {} } },
1073            .add_64be            => .{ .rmw, .{ .add       , i64, .big         , {} } },
1074
1075            .packed_set_rng_8    => .{ .rmw, .{ .packed_rng, u8 , native_endian, {} } },
1076            .packed_set_rng_16le => .{ .rmw, .{ .packed_rng, u16, .little      , {} } },
1077            .packed_set_rng_32le => .{ .rmw, .{ .packed_rng, u32, .little      , {} } },
1078            .packed_set_rng_64le => .{ .rmw, .{ .packed_rng, u64, .little      , {} } },
1079            .packed_set_rng_16be => .{ .rmw, .{ .packed_rng, u16, .big         , {} } },
1080            .packed_set_rng_32be => .{ .rmw, .{ .packed_rng, u32, .big         , {} } },
1081            .packed_set_rng_64be => .{ .rmw, .{ .packed_rng, u64, .big         , {} } },
1082            // zig fmt: on
1083        };
1084
1085        switch (class) {
1086            .new => {
1087                const op: enum {
1088                    set,
1089                    insert,
1090                    push,
1091
1092                    pub fn maxLen(comptime op: @This(), in_len: usize) usize {
1093                        return switch (op) {
1094                            .set => @min(in_len, max_set_len),
1095                            .insert, .push => max_insert_len,
1096                        };
1097                    }
1098                }, const min_in_len, const data: enum {
1099                    rng,
1100                    zero,
1101                    common,
1102                    print,
1103                    integer,
1104                    wtf8,
1105                    existing,
1106                    splice,
1107                    @"const",
1108                    small,
1109                    few,
1110                }, const data_ctx = class_ctx;
1111                const Size = enum { one, many, large };
1112                if (in.len < min_in_len) return false;
1113                if (data == .@"const" and data_ctx.len == 0) return false;
1114
1115                const splice_i = if (data == .splice) blk: {
1116                    // Element zero always holds an empty input, so we do not select it
1117                    if (corpus.len == 1) return false;
1118                    break :blk rng.intRangeLessThanBiased(usize, 1, corpus.len);
1119                } else undefined;
1120
1121                // Only needs to be followed for set
1122                const len = switch (data) {
1123                    else => switch (@as(Size, data_ctx)) {
1124                        .one => 1,
1125                        .many => rng.intRangeAtMostBiased(usize, 1, op.maxLen(in.len)),
1126                        .large => rng.intRangeAtMostBiased(usize, 1, max_large_insert_len),
1127                    },
1128                    .wtf8 => undefined, // varies by size of each code unit
1129                    .splice => rng.intRangeAtMostBiased(usize, 1, @min(
1130                        corpus[splice_i].len,
1131                        op.maxLen(in.len),
1132                    )),
1133                    .existing => rng.intRangeAtMostBiased(usize, 1, @min(
1134                        in.len,
1135                        op.maxLen(in.len),
1136                    )),
1137                    .@"const" => @sizeOf(@typeInfo(@TypeOf(data_ctx)).pointer.child),
1138                    .small, .few => @sizeOf(data_ctx[0]),
1139                };
1140
1141                const i = switch (op) {
1142                    .set => rng.uintAtMostBiased(usize, in.len - len),
1143                    .insert => rng.uintAtMostBiased(usize, in.len),
1144                    .push => in.len,
1145                };
1146
1147                out.appendSliceAssumeCapacity(in[0..i]);
1148                switch (data) {
1149                    .rng => {
1150                        var bytes: [@max(max_insert_len, max_set_len)]u8 = undefined;
1151                        rng.bytes(bytes[0..len]);
1152                        out.appendSliceAssumeCapacity(bytes[0..len]);
1153                    },
1154                    .zero => out.appendNTimesAssumeCapacity(0, len),
1155                    .common => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
1156                        c.* = switch (rng.int(u6)) {
1157                            0 => ' ',
1158                            1...10 => |x| '0' + (@as(u8, x) - 1),
1159                            11...36 => |x| 'A' + (@as(u8, x) - 11),
1160                            37 => '_',
1161                            38...63 => |x| 'a' + (@as(u8, x) - 38),
1162                        };
1163                    },
1164                    .print => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
1165                        c.* = rng.intRangeAtMostBiased(u8, 0x20, 0x7E);
1166                    },
1167                    .integer => {
1168                        const negative = len != 0 and rng.boolean();
1169                        if (negative) {
1170                            out.appendAssumeCapacity('-');
1171                        }
1172
1173                        for (out.addManyAsSliceAssumeCapacity(len - @intFromBool(negative))) |*c| {
1174                            c.* = rng.intRangeAtMostBiased(u8, '0', '9');
1175                        }
1176                    },
1177                    .wtf8 => {
1178                        comptime assert(op != .set);
1179                        var codepoints: usize = if (data_ctx == .one)
1180                            1
1181                        else
1182                            rng.intRangeAtMostBiased(usize, 1, Mutation.max_insert_len / 4);
1183
1184                        while (true) {
1185                            const units1 = rng.int(u2);
1186                            const value = switch (units1) {
1187                                0 => rng.int(u7),
1188                                1 => rng.intRangeAtMostBiased(u11, 0x000080, 0x0007FF),
1189                                2 => rng.intRangeAtMostBiased(u16, 0x000800, 0x00FFFF),
1190                                3 => rng.intRangeAtMostBiased(u21, 0x010000, 0x10FFFF),
1191                            };
1192                            const units = @as(u3, units1) + 1;
1193
1194                            var buf: [4]u8 = undefined;
1195                            assert(std.unicode.wtf8Encode(value, &buf) catch unreachable == units);
1196                            out.appendSliceAssumeCapacity(buf[0..units]);
1197
1198                            codepoints -= 1;
1199                            if (codepoints == 0) break;
1200                        }
1201                    },
1202                    .existing => {
1203                        const j = rng.uintAtMostBiased(usize, in.len - len);
1204                        out.appendSliceAssumeCapacity(in[j..][0..len]);
1205                    },
1206                    .splice => {
1207                        const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
1208                        out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
1209                    },
1210                    .@"const" => out.appendSliceAssumeCapacity(@ptrCast(
1211                        &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
1212                    )),
1213                    .small => out.appendSliceAssumeCapacity(@ptrCast(
1214                        &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
1215                    )),
1216                    .few => out.appendSliceAssumeCapacity(@ptrCast(
1217                        &fewValue(rng, data_ctx[0], data_ctx[1]),
1218                    )),
1219                }
1220                switch (op) {
1221                    .set => out.appendSliceAssumeCapacity(in[i + len ..]),
1222                    .insert => out.appendSliceAssumeCapacity(in[i..]),
1223                    .push => {},
1224                }
1225            },
1226            .remove => {
1227                if (in.len == 0) return false;
1228                const Op = enum { delete, pop };
1229                const op: Op, const max_len = class_ctx;
1230                // LessThan is used so we don't delete the entire span (which is unproductive since
1231                // an empty input has always been tried)
1232                const len = if (max_len == 1) 1 else rng.uintLessThanBiased(
1233                    usize,
1234                    @min(max_len + 1, in.len),
1235                );
1236                switch (op) {
1237                    .delete => {
1238                        const i = rng.uintAtMostBiased(usize, in.len - len);
1239                        out.appendSliceAssumeCapacity(in[0..i]);
1240                        out.appendSliceAssumeCapacity(in[i + len ..]);
1241                    },
1242                    .pop => out.appendSliceAssumeCapacity(in[0 .. in.len - len]),
1243                }
1244            },
1245            .rmw => {
1246                const Op = enum { xor, truncate, add, packed_rng };
1247                const op: Op, const T, const endian, const xor_bits = class_ctx;
1248                if (in.len < @sizeOf(T)) return false;
1249                const Log2T = math.Log2Int(T);
1250
1251                const idx = rng.uintAtMostBiased(usize, in.len - @sizeOf(T));
1252                const old = mem.readInt(T, in[idx..][0..@sizeOf(T)], endian);
1253                const new = switch (op) {
1254                    .xor => old ^ fewValue(rng, T, xor_bits),
1255                    .truncate => old & (@as(T, math.maxInt(T)) >> rng.int(Log2T)),
1256                    .add => old +% addend: {
1257                        const val = rng.int(Mutation.AddValue);
1258                        break :addend if (val == 0) 1 else val;
1259                    },
1260                    .packed_rng => blk: {
1261                        const bits = rng.int(math.Log2Int(T)) +| 1;
1262                        break :blk old ^ (rng.int(T) >> bits << rng.uintAtMostBiased(Log2T, bits));
1263                    },
1264                };
1265                out.appendSliceAssumeCapacity(in);
1266                mem.bytesAsValue(T, out.items[8..][idx..][0..@sizeOf(T)]).* =
1267                    mem.nativeTo(T, new, endian);
1268            },
1269            .move_span => {
1270                if (in.len < 2) return false;
1271                // One less since moving whole output will never change anything
1272                const len = rng.intRangeAtMostBiased(usize, 1, @min(
1273                    in.len - 1,
1274                    Mutation.max_set_len,
1275                ));
1276
1277                const src = rng.uintAtMostBiased(usize, in.len - len);
1278                // This indexes into the final input
1279                const dst = blk: {
1280                    const res = rng.uintAtMostBiased(usize, in.len - len - 1);
1281                    break :blk res + @intFromBool(res >= src);
1282                };
1283
1284                if (src < dst) {
1285                    out.appendSliceAssumeCapacity(in[0..src]);
1286                    out.appendSliceAssumeCapacity(in[src + len .. dst + len]);
1287                    out.appendSliceAssumeCapacity(in[src..][0..len]);
1288                    out.appendSliceAssumeCapacity(in[dst + len ..]);
1289                } else {
1290                    out.appendSliceAssumeCapacity(in[0..dst]);
1291                    out.appendSliceAssumeCapacity(in[src..][0..len]);
1292                    out.appendSliceAssumeCapacity(in[dst..src]);
1293                    out.appendSliceAssumeCapacity(in[src + len ..]);
1294                }
1295            },
1296            .replicate_splice_span => {
1297                if (in.len == 0) return false;
1298                if (corpus.len == 1) return false;
1299                const from = corpus[rng.intRangeLessThanBiased(usize, 1, corpus.len)];
1300                const len = rng.uintLessThanBiased(usize, @min(in.len, from.len, max_replicate_len));
1301                const i = rng.uintAtMostBiased(usize, @min(in.len, from.len) - len);
1302                out.appendSliceAssumeCapacity(in[0..i]);
1303                out.appendSliceAssumeCapacity(from[i..][0..len]);
1304                out.appendSliceAssumeCapacity(in[i + len ..]);
1305            },
1306        }
1307        return true;
1308    }
1309};
1310
1311/// Like `std.ArrayList(u8)` but backed by memory mapping.
1312pub const MemoryMappedList = struct {
1313    /// Contents of the list.
1314    ///
1315    /// Pointers to elements in this slice are invalidated by various functions
1316    /// of this ArrayList in accordance with the respective documentation. In
1317    /// all cases, "invalidated" means that the memory has been passed to this
1318    /// allocator's resize or free function.
1319    items: []align(std.heap.page_size_min) volatile u8,
1320    /// How many bytes this list can hold without allocating additional memory.
1321    capacity: usize,
1322    /// The file is kept open so that it can be resized.
1323    file: std.fs.File,
1324
1325    pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
1326        const ptr = try std.posix.mmap(
1327            null,
1328            capacity,
1329            std.posix.PROT.READ | std.posix.PROT.WRITE,
1330            .{ .TYPE = .SHARED },
1331            file.handle,
1332            0,
1333        );
1334        return .{
1335            .file = file,
1336            .items = ptr[0..length],
1337            .capacity = capacity,
1338        };
1339    }
1340
1341    pub fn create(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
1342        try file.setEndPos(capacity);
1343        return init(file, length, capacity);
1344    }
1345
1346    pub fn deinit(l: *MemoryMappedList) void {
1347        l.file.close();
1348        std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
1349        l.* = undefined;
1350    }
1351
1352    /// Modify the array so that it can hold at least `additional_count` **more** items.
1353    /// Invalidates element pointers if additional memory is needed.
1354    pub fn ensureUnusedCapacity(l: *MemoryMappedList, additional_count: usize) !void {
1355        return l.ensureTotalCapacity(l.items.len + additional_count);
1356    }
1357
1358    /// If the current capacity is less than `new_capacity`, this function will
1359    /// modify the array so that it can hold at least `new_capacity` items.
1360    /// Invalidates element pointers if additional memory is needed.
1361    pub fn ensureTotalCapacity(l: *MemoryMappedList, new_capacity: usize) !void {
1362        if (l.capacity >= new_capacity) return;
1363
1364        const better_capacity = growCapacity(l.capacity, new_capacity);
1365        return l.ensureTotalCapacityPrecise(better_capacity);
1366    }
1367
1368    pub fn ensureTotalCapacityPrecise(l: *MemoryMappedList, new_capacity: usize) !void {
1369        if (l.capacity >= new_capacity) return;
1370
1371        std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
1372        try l.file.setEndPos(new_capacity);
1373        l.* = try init(l.file, l.items.len, new_capacity);
1374    }
1375
1376    /// Invalidates all element pointers.
1377    pub fn clearRetainingCapacity(l: *MemoryMappedList) void {
1378        l.items.len = 0;
1379    }
1380
1381    /// Append the slice of items to the list.
1382    /// Asserts that the list can hold the additional items.
1383    pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void {
1384        const old_len = l.items.len;
1385        const new_len = old_len + items.len;
1386        assert(new_len <= l.capacity);
1387        l.items.len = new_len;
1388        @memcpy(l.items[old_len..][0..items.len], items);
1389    }
1390
1391    /// Extends the list by 1 element.
1392    /// Never invalidates element pointers.
1393    /// Asserts that the list can hold one additional item.
1394    pub fn appendAssumeCapacity(l: *MemoryMappedList, item: u8) void {
1395        const new_item_ptr = l.addOneAssumeCapacity();
1396        new_item_ptr.* = item;
1397    }
1398
1399    /// Increase length by 1, returning pointer to the new item.
1400    /// The returned pointer becomes invalid when the list is resized.
1401    /// Never invalidates element pointers.
1402    /// Asserts that the list can hold one additional item.
1403    pub fn addOneAssumeCapacity(l: *MemoryMappedList) *volatile u8 {
1404        assert(l.items.len < l.capacity);
1405        l.items.len += 1;
1406        return &l.items[l.items.len - 1];
1407    }
1408
1409    /// Append a value to the list `n` times.
1410    /// Never invalidates element pointers.
1411    /// The function is inline so that a comptime-known `value` parameter will
1412    /// have better memset codegen in case it has a repeated byte pattern.
1413    /// Asserts that the list can hold the additional items.
1414    pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void {
1415        const new_len = l.items.len + n;
1416        assert(new_len <= l.capacity);
1417        @memset(l.items.ptr[l.items.len..new_len], value);
1418        l.items.len = new_len;
1419    }
1420
1421    /// Resize the array, adding `n` new elements, which have `undefined` values.
1422    /// The return value is a slice pointing to the newly allocated elements.
1423    /// Never invalidates element pointers.
1424    /// The returned pointer becomes invalid when the list is resized.
1425    /// Asserts that the list can hold the additional items.
1426    pub fn addManyAsSliceAssumeCapacity(l: *MemoryMappedList, n: usize) []volatile u8 {
1427        assert(l.items.len + n <= l.capacity);
1428        const prev_len = l.items.len;
1429        l.items.len += n;
1430        return l.items[prev_len..][0..n];
1431    }
1432
1433    /// Called when memory growth is necessary. Returns a capacity larger than
1434    /// minimum that grows super-linearly.
1435    fn growCapacity(current: usize, minimum: usize) usize {
1436        var new = current;
1437        while (true) {
1438            new = mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
1439            if (new >= minimum) return new;
1440        }
1441    }
1442
1443    pub fn insertAssumeCapacity(l: *MemoryMappedList, i: usize, item: u8) void {
1444        assert(l.items.len + 1 <= l.capacity);
1445        l.items.len += 1;
1446        volatileCopyBackwards(u8, l.items[i + 1 ..], l.items[i .. l.items.len - 1]);
1447        l.items[i] = item;
1448    }
1449
1450    pub fn orderedRemove(l: *MemoryMappedList, i: usize) u8 {
1451        assert(l.items.len + 1 <= l.capacity);
1452        const old = l.items[i];
1453        volatileCopyForwards(u8, l.items[i .. l.items.len - 1], l.items[i + 1 ..]);
1454        l.items.len -= 1;
1455        return old;
1456    }
1457};