master
   1//! Allocates statically ~224K (128K lookup, 96K tokens).
   2//!
   3//! The source of an `error.WriteFailed` is always the backing writer. After an
   4//! `error.WriteFailed`, the `.writer` becomes `.failing` and is unrecoverable.
   5//! After a `flush`, the writer also becomes `.failing` since the stream has
   6//! been finished. This behavior also applies to `Raw` and `Huffman`.
   7
   8// Implementation details:
   9//   A chained hash table is used to find matches. `drain` always preserves `flate.history_len`
  10//   bytes to use as a history and avoids tokenizing the final bytes since they can be part of
  11//   a longer match with unwritten bytes (unless it is a `flush`). The minimum match searched
  12//   for is of length `seq_bytes`. If a match is made, a longer match is also checked for at
  13//   the next byte (lazy matching) if the last match does not meet the `Options.lazy` threshold.
  14//
  15//   Up to `block_token` tokens are accumalated in `buffered_tokens` and are outputted in
  16//   `write_block` which determines the optimal block type and frequencies.
  17
  18const builtin = @import("builtin");
  19const std = @import("std");
  20const mem = std.mem;
  21const math = std.math;
  22const assert = std.debug.assert;
  23const Io = std.Io;
  24const Writer = Io.Writer;
  25
  26const Compress = @This();
  27const token = @import("token.zig");
  28const flate = @import("../flate.zig");
  29
  30/// Until #104 is implemented, a ?u15 takes 4 bytes, which is unacceptable
  31/// as it doubles the size of this already massive structure.
  32///
  33/// Also, there are no `to` / `from` methods because LLVM 21 does not
  34/// optimize away the conversion from and to `?u15`.
  35const PackedOptionalU15 = packed struct(u16) {
  36    value: u15,
  37    is_null: bool,
  38
  39    pub fn int(p: PackedOptionalU15) u16 {
  40        return @bitCast(p);
  41    }
  42
  43    pub const null_bit: PackedOptionalU15 = .{ .value = 0, .is_null = true };
  44};
  45
  46/// After `flush` is called, all vtable calls with result in `error.WriteFailed.`
  47writer: Writer,
  48has_history: bool,
  49bit_writer: BitWriter,
  50buffered_tokens: struct {
  51    /// List of `TokenBufferEntryHeader`s and their trailing data.
  52    list: [@as(usize, block_tokens) * 3]u8,
  53    pos: u32,
  54    n: u16,
  55    lit_freqs: [286]u16,
  56    dist_freqs: [30]u16,
  57
  58    pub const empty: @This() = .{
  59        .list = undefined,
  60        .pos = 0,
  61        .n = 0,
  62        .lit_freqs = @splat(0),
  63        .dist_freqs = @splat(0),
  64    };
  65},
  66lookup: struct {
  67    /// Indexes are the hashes of four-bytes sequences.
  68    ///
  69    /// Values are the positions in `chain` of the previous four bytes with the same hash.
  70    head: [1 << lookup_hash_bits]PackedOptionalU15,
  71    /// Values are the non-zero number of bytes backwards in the history with the same hash.
  72    ///
  73    /// The relationship of chain indexes and bytes relative to the latest history byte is
  74    /// `chain_pos -% chain_index = history_index`.
  75    chain: [32768]PackedOptionalU15,
  76    /// The index in `chain` which is of the newest byte of the history.
  77    chain_pos: u15,
  78},
  79container: flate.Container,
  80hasher: flate.Container.Hasher,
  81opts: Options,
  82
  83const BitWriter = struct {
  84    output: *Writer,
  85    buffered: u7,
  86    buffered_n: u3,
  87
  88    pub fn init(w: *Writer) BitWriter {
  89        return .{
  90            .output = w,
  91            .buffered = 0,
  92            .buffered_n = 0,
  93        };
  94    }
  95
  96    /// Asserts `bits` is zero-extended
  97    pub fn write(b: *BitWriter, bits: u56, n: u6) Writer.Error!void {
  98        assert(@as(u8, b.buffered) >> b.buffered_n == 0);
  99        assert(@as(u57, bits) >> n == 0); // n may be 56 so u57 is needed
 100        const combined = @shlExact(@as(u64, bits), b.buffered_n) | b.buffered;
 101        const combined_bits = @as(u6, b.buffered_n) + n;
 102
 103        const out = try b.output.writableSliceGreedy(8);
 104        mem.writeInt(u64, out[0..8], combined, .little);
 105        b.output.advance(combined_bits / 8);
 106
 107        b.buffered_n = @truncate(combined_bits);
 108        b.buffered = @intCast(combined >> (combined_bits - b.buffered_n));
 109    }
 110
 111    /// Assserts one byte can be written to `b.otuput` without rebasing.
 112    pub fn byteAlign(b: *BitWriter) void {
 113        b.output.unusedCapacitySlice()[0] = b.buffered;
 114        b.output.advance(@intFromBool(b.buffered_n != 0));
 115        b.buffered = 0;
 116        b.buffered_n = 0;
 117    }
 118
 119    pub fn writeClen(
 120        b: *BitWriter,
 121        hclen: u4,
 122        clen_values: []u8,
 123        clen_extra: []u8,
 124        clen_codes: [19]u16,
 125        clen_bits: [19]u4,
 126    ) Writer.Error!void {
 127        // Write the first four clen entries seperately since they are always present,
 128        // and writing them all at once takes too many bits.
 129        try b.write(clen_bits[token.codegen_order[0]] |
 130            @shlExact(@as(u6, clen_bits[token.codegen_order[1]]), 3) |
 131            @shlExact(@as(u9, clen_bits[token.codegen_order[2]]), 6) |
 132            @shlExact(@as(u12, clen_bits[token.codegen_order[3]]), 9), 12);
 133
 134        var i = hclen;
 135        var clen_bits_table: u45 = 0;
 136        while (i != 0) {
 137            i -= 1;
 138            clen_bits_table <<= 3;
 139            clen_bits_table |= clen_bits[token.codegen_order[4..][i]];
 140        }
 141        try b.write(clen_bits_table, @as(u6, hclen) * 3);
 142
 143        for (clen_values, clen_extra) |value, extra| {
 144            try b.write(
 145                clen_codes[value] | @shlExact(@as(u16, extra), clen_bits[value]),
 146                clen_bits[value] + @as(u3, switch (value) {
 147                    0...15 => 0,
 148                    16 => 2,
 149                    17 => 3,
 150                    18 => 7,
 151                    else => unreachable,
 152                }),
 153            );
 154        }
 155    }
 156};
 157
 158/// Number of tokens to accumulate before outputing as a block.
 159/// The maximum value is `math.maxInt(u16) - 1` since one token is reserved for end-of-block.
 160const block_tokens: u16 = 1 << 15;
 161const lookup_hash_bits = 15;
 162const Hash = u16; // `u[lookup_hash_bits]` is not used due to worse optimization (with LLVM 21)
 163const seq_bytes = 3; // not intended to be changed
 164const Seq = std.meta.Int(.unsigned, seq_bytes * 8);
 165
 166const TokenBufferEntryHeader = packed struct(u16) {
 167    kind: enum(u1) {
 168        /// Followed by non-zero `data` byte literals.
 169        bytes,
 170        /// Followed by the length as a byte
 171        match,
 172    },
 173    data: u15,
 174};
 175
 176const BlockHeader = packed struct(u3) {
 177    final: bool,
 178    kind: enum(u2) { stored, fixed, dynamic, _ },
 179
 180    pub fn int(h: BlockHeader) u3 {
 181        return @bitCast(h);
 182    }
 183
 184    pub const Dynamic = packed struct(u17) {
 185        regular: BlockHeader,
 186        hlit: u5,
 187        hdist: u5,
 188        hclen: u4,
 189
 190        pub fn int(h: Dynamic) u17 {
 191            return @bitCast(h);
 192        }
 193    };
 194};
 195
 196fn outputMatch(c: *Compress, dist: u15, len: u8) Writer.Error!void {
 197    // This must come first. Instead of ensuring a full block is never left buffered,
 198    // draining it is defered to allow end of stream to be indicated.
 199    if (c.buffered_tokens.n == block_tokens) {
 200        @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
 201        try c.writeBlock(false);
 202    }
 203    const header: TokenBufferEntryHeader = .{ .kind = .match, .data = dist };
 204    c.buffered_tokens.list[c.buffered_tokens.pos..][0..2].* = @bitCast(header);
 205    c.buffered_tokens.list[c.buffered_tokens.pos + 2] = len;
 206    c.buffered_tokens.pos += 3;
 207    c.buffered_tokens.n += 1;
 208
 209    c.buffered_tokens.lit_freqs[@as(usize, 257) + token.LenCode.fromVal(len).toInt()] += 1;
 210    c.buffered_tokens.dist_freqs[token.DistCode.fromVal(dist).toInt()] += 1;
 211}
 212
 213fn outputBytes(c: *Compress, bytes: []const u8) Writer.Error!void {
 214    var remaining = bytes;
 215    while (remaining.len != 0) {
 216        if (c.buffered_tokens.n == block_tokens) {
 217            @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
 218            try c.writeBlock(false);
 219        }
 220
 221        const n = @min(remaining.len, block_tokens - c.buffered_tokens.n, math.maxInt(u15));
 222        assert(n != 0);
 223        const header: TokenBufferEntryHeader = .{ .kind = .bytes, .data = n };
 224        c.buffered_tokens.list[c.buffered_tokens.pos..][0..2].* = @bitCast(header);
 225        @memcpy(c.buffered_tokens.list[c.buffered_tokens.pos + 2 ..][0..n], remaining[0..n]);
 226        c.buffered_tokens.pos += @as(u32, 2) + n;
 227        c.buffered_tokens.n += n;
 228
 229        for (remaining[0..n]) |b| {
 230            c.buffered_tokens.lit_freqs[b] += 1;
 231        }
 232        remaining = remaining[n..];
 233    }
 234}
 235
 236fn hash(x: u32) Hash {
 237    return @intCast((x *% 0x9E3779B1) >> (32 - lookup_hash_bits));
 238}
 239
 240/// Trades between speed and compression size.
 241///
 242/// Default paramaters are [taken from zlib]
 243/// (https://github.com/madler/zlib/blob/v1.3.1/deflate.c#L112)
 244pub const Options = struct {
 245    /// Perform less lookups when a match of at least this length has been found.
 246    good: u16,
 247    /// Stop when a match of at least this length has been found.
 248    nice: u16,
 249    /// Don't attempt a lazy match find when a match of at least this length has been found.
 250    lazy: u16,
 251    /// Check this many previous locations with the same hash for longer matches.
 252    chain: u16,
 253
 254    // zig fmt: off
 255    pub const level_1: Options = .{ .good =  4, .nice =   8, .lazy =   0, .chain =    4 };
 256    pub const level_2: Options = .{ .good =  4, .nice =  16, .lazy =   0, .chain =    8 };
 257    pub const level_3: Options = .{ .good =  4, .nice =  32, .lazy =   0, .chain =   32 };
 258    pub const level_4: Options = .{ .good =  4, .nice =  16, .lazy =   4, .chain =   16 };
 259    pub const level_5: Options = .{ .good =  8, .nice =  32, .lazy =  16, .chain =   32 };
 260    pub const level_6: Options = .{ .good =  8, .nice = 128, .lazy =  16, .chain =  128 };
 261    pub const level_7: Options = .{ .good =  8, .nice = 128, .lazy =  32, .chain =  256 };
 262    pub const level_8: Options = .{ .good = 32, .nice = 258, .lazy = 128, .chain = 1024 };
 263    pub const level_9: Options = .{ .good = 32, .nice = 258, .lazy = 258, .chain = 4096 };
 264     // zig fmt: on
 265    pub const fastest = level_1;
 266    pub const default = level_6;
 267    pub const best = level_9;
 268};
 269
 270/// It is asserted `buffer` is least `flate.max_history_len` bytes.
 271/// It is asserted `output` has a capacity of at least 8 bytes.
 272pub fn init(
 273    output: *Writer,
 274    buffer: []u8,
 275    container: flate.Container,
 276    opts: Options,
 277) Writer.Error!Compress {
 278    assert(output.buffer.len > 8);
 279    assert(buffer.len >= flate.max_window_len);
 280
 281    // note that disallowing some of these simplifies matching logic
 282    assert(opts.chain != 0); // use `Huffman`, disallowing this simplies matching
 283    assert(opts.good >= 3 and opts.nice >= 3); // a match will (usually) not be found
 284    assert(opts.good <= 258 and opts.nice <= 258); // a longer match will not be found
 285    assert(opts.lazy <= opts.nice); // a longer match will (usually) not be found
 286    if (opts.good <= opts.lazy) assert(opts.chain >= 1 << 2); // chain can be reduced to zero
 287
 288    try output.writeAll(container.header());
 289    return .{
 290        .writer = .{
 291            .buffer = buffer,
 292            .vtable = &.{
 293                .drain = drain,
 294                .flush = flush,
 295                .rebase = rebase,
 296            },
 297        },
 298        .has_history = false,
 299        .bit_writer = .init(output),
 300        .buffered_tokens = .empty,
 301        .lookup = .{
 302            // init `value` is max so there is 0xff pattern
 303            .head = @splat(.{ .value = math.maxInt(u15), .is_null = true }),
 304            .chain = undefined,
 305            .chain_pos = math.maxInt(u15),
 306        },
 307        .container = container,
 308        .opts = opts,
 309        .hasher = .init(container),
 310    };
 311}
 312
 313fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
 314    errdefer w.* = .failing;
 315    // There may have not been enough space in the buffer and the write was sent directly here.
 316    // However, it is required that all data goes through the buffer to keep a history.
 317    //
 318    // Additionally, ensuring the buffer is always full ensures there is always a full history
 319    // after.
 320    const data_n = w.buffer.len - w.end;
 321    _ = w.fixedDrain(data, splat) catch {};
 322    assert(w.end == w.buffer.len);
 323    try rebaseInner(w, 0, 1, false);
 324    return data_n;
 325}
 326
 327fn flush(w: *Writer) Writer.Error!void {
 328    defer w.* = .failing;
 329    const c: *Compress = @fieldParentPtr("writer", w);
 330    try rebaseInner(w, 0, w.buffer.len - flate.history_len, true);
 331    try c.bit_writer.output.rebase(0, 1);
 332    c.bit_writer.byteAlign();
 333    try c.hasher.writeFooter(c.bit_writer.output);
 334}
 335
 336fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
 337    return rebaseInner(w, preserve, capacity, false);
 338}
 339
 340pub const rebase_min_preserve = flate.history_len;
 341pub const rebase_reserved_capacity = (token.max_length + 1) + seq_bytes;
 342
 343fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
 344    if (!eos) {
 345        assert(@max(preserve, rebase_min_preserve) + (capacity + rebase_reserved_capacity) <= w.buffer.len);
 346        assert(w.end >= flate.history_len + rebase_reserved_capacity); // Above assert should
 347        // fail since rebase is only called when `capacity` is not present. This assertion is
 348        // important because a full history is required at the end.
 349    } else {
 350        assert(preserve == 0 and capacity == w.buffer.len - flate.history_len);
 351    }
 352
 353    const c: *Compress = @fieldParentPtr("writer", w);
 354    const buffered = w.buffered();
 355
 356    const start = @as(usize, flate.history_len) * @intFromBool(c.has_history);
 357    const lit_end: usize = if (!eos)
 358        buffered.len - rebase_reserved_capacity - (preserve -| flate.history_len)
 359    else
 360        buffered.len -| (seq_bytes - 1);
 361
 362    var i = start;
 363    var last_unmatched = i;
 364    // Read from `w.buffer` instead of `buffered` since the latter may not
 365    // have enough bytes. If this is the case, this variable is not used.
 366    var seq: Seq = mem.readInt(
 367        std.meta.Int(.unsigned, (seq_bytes - 1) * 8),
 368        w.buffer[i..][0 .. seq_bytes - 1],
 369        .big,
 370    );
 371    if (buffered[i..].len < seq_bytes - 1) {
 372        @branchHint(.unlikely);
 373        assert(eos);
 374        seq = undefined;
 375        assert(i >= lit_end);
 376    }
 377
 378    while (i < lit_end) {
 379        var match_start = i;
 380        seq <<= 8;
 381        seq |= buffered[i + (seq_bytes - 1)];
 382        var match = c.matchAndAddHash(i, hash(seq), token.min_length - 1, c.opts.chain, c.opts.good);
 383        i += 1;
 384        if (match.len < token.min_length) continue;
 385
 386        var match_unadded = match.len - 1;
 387        lazy: {
 388            if (match.len >= c.opts.lazy) break :lazy;
 389            if (match.len >= c.writer.buffered()[i..].len) {
 390                @branchHint(.unlikely); // Only end of stream
 391                break :lazy;
 392            }
 393
 394            var chain = c.opts.chain;
 395            var good = c.opts.good;
 396            if (match.len >= good) {
 397                chain >>= 2;
 398                good = math.maxInt(u8); // Reduce only once
 399            }
 400
 401            seq <<= 8;
 402            seq |= buffered[i + (seq_bytes - 1)];
 403            const lazy = c.matchAndAddHash(i, hash(seq), match.len, chain, good);
 404            match_unadded -= 1;
 405            i += 1;
 406
 407            if (lazy.len > match.len) {
 408                match_start += 1;
 409                match = lazy;
 410                match_unadded = match.len - 1;
 411            }
 412        }
 413
 414        assert(i + match_unadded == match_start + match.len);
 415        assert(mem.eql(
 416            u8,
 417            buffered[match_start..][0..match.len],
 418            buffered[match_start - 1 - match.dist ..][0..match.len],
 419        )); // This assert also seems to help codegen.
 420
 421        try c.outputBytes(buffered[last_unmatched..match_start]);
 422        try c.outputMatch(@intCast(match.dist), @intCast(match.len - 3));
 423
 424        last_unmatched = match_start + match.len;
 425        if (last_unmatched + seq_bytes >= w.end) {
 426            @branchHint(.unlikely);
 427            assert(eos);
 428            i = undefined;
 429            break;
 430        }
 431
 432        while (true) {
 433            seq <<= 8;
 434            seq |= buffered[i + (seq_bytes - 1)];
 435            _ = c.addHash(i, hash(seq));
 436            i += 1;
 437
 438            match_unadded -= 1;
 439            if (match_unadded == 0) break;
 440        }
 441        assert(i == match_start + match.len);
 442    }
 443
 444    if (eos) {
 445        i = undefined; // (from match hashing logic)
 446        try c.outputBytes(buffered[last_unmatched..]);
 447        c.hasher.update(buffered[start..]);
 448        try c.writeBlock(true);
 449        return;
 450    }
 451
 452    try c.outputBytes(buffered[last_unmatched..i]);
 453    c.hasher.update(buffered[start..i]);
 454
 455    const preserved = buffered[i - flate.history_len ..];
 456    assert(preserved.len > @max(rebase_min_preserve, preserve));
 457    @memmove(w.buffer[0..preserved.len], preserved);
 458    w.end = preserved.len;
 459    c.has_history = true;
 460}
 461
 462fn addHash(c: *Compress, i: usize, h: Hash) void {
 463    assert(h == hash(mem.readInt(Seq, c.writer.buffer[i..][0..seq_bytes], .big)));
 464
 465    const l = &c.lookup;
 466    l.chain_pos +%= 1;
 467
 468    // Equivilent to the below, however LLVM 21 does not optimize `@subWithOverflow` well at all.
 469    // const replaced_i, const no_replace = @subWithOverflow(i, flate.history_len);
 470    // if (no_replace == 0) {
 471    if (i >= flate.history_len) {
 472        @branchHint(.likely);
 473        const replaced_i = i - flate.history_len;
 474        // The following is the same as the below except uses a 32-bit load to help optimizations
 475        // const replaced_seq = mem.readInt(Seq, c.writer.buffer[replaced_i..][0..seq_bytes], .big);
 476        comptime assert(@sizeOf(Seq) <= @sizeOf(u32));
 477        const replaced_u32 = mem.readInt(u32, c.writer.buffered()[replaced_i..][0..4], .big);
 478        const replaced_seq: Seq = @intCast(replaced_u32 >> (32 - @bitSizeOf(Seq)));
 479
 480        const replaced_h = hash(replaced_seq);
 481        // The following is equivilent to the below since LLVM 21 doesn't optimize it well.
 482        // l.head[replaced_h].is_null = l.head[replaced_h].is_null or
 483        //     l.head[replaced_h].int() == l.chain_pos;
 484        const empty_head = l.head[replaced_h].int() == l.chain_pos;
 485        const null_flag = PackedOptionalU15.int(.{ .is_null = empty_head, .value = 0 });
 486        l.head[replaced_h] = @bitCast(l.head[replaced_h].int() | null_flag);
 487    }
 488
 489    const prev_chain_index = l.head[h];
 490    l.chain[l.chain_pos] = @bitCast((l.chain_pos -% prev_chain_index.value) |
 491        (prev_chain_index.int() & PackedOptionalU15.null_bit.int())); // Preserves null
 492    l.head[h] = .{ .value = l.chain_pos, .is_null = false };
 493}
 494
 495/// If the match is shorter, the returned value can be any value `<= old`.
 496fn betterMatchLen(old: u16, prev: []const u8, bytes: []const u8) u16 {
 497    assert(old < @min(bytes.len, token.max_length));
 498    assert(prev.len >= bytes.len);
 499    assert(bytes.len >= token.min_length);
 500
 501    var i: u16 = 0;
 502    const Block = std.meta.Int(.unsigned, @min(math.divCeil(
 503        comptime_int,
 504        math.ceilPowerOfTwoAssert(usize, @bitSizeOf(usize)),
 505        8,
 506    ) catch unreachable, 256) * 8);
 507
 508    if (bytes.len < token.max_length) {
 509        @branchHint(.unlikely); // Only end of stream
 510
 511        while (bytes[i..].len >= @sizeOf(Block)) {
 512            const a = mem.readInt(Block, prev[i..][0..@sizeOf(Block)], .little);
 513            const b = mem.readInt(Block, bytes[i..][0..@sizeOf(Block)], .little);
 514            const diff = a ^ b;
 515            if (diff != 0) {
 516                @branchHint(.likely);
 517                i += @ctz(diff) / 8;
 518                return i;
 519            }
 520            i += @sizeOf(Block);
 521        }
 522
 523        while (i != bytes.len and prev[i] == bytes[i]) {
 524            i += 1;
 525        }
 526        assert(i < token.max_length);
 527        return i;
 528    }
 529
 530    if (old >= @sizeOf(Block)) {
 531        // Check that a longer end is present, otherwise the match is always worse
 532        const a = mem.readInt(Block, prev[old + 1 - @sizeOf(Block) ..][0..@sizeOf(Block)], .little);
 533        const b = mem.readInt(Block, bytes[old + 1 - @sizeOf(Block) ..][0..@sizeOf(Block)], .little);
 534        if (a != b) return i;
 535    }
 536
 537    while (true) {
 538        const a = mem.readInt(Block, prev[i..][0..@sizeOf(Block)], .little);
 539        const b = mem.readInt(Block, bytes[i..][0..@sizeOf(Block)], .little);
 540        const diff = a ^ b;
 541        if (diff != 0) {
 542            i += @ctz(diff) / 8;
 543            return i;
 544        }
 545        i += @sizeOf(Block);
 546        if (i == 256) break;
 547    }
 548
 549    const a = mem.readInt(u16, prev[i..][0..2], .little);
 550    const b = mem.readInt(u16, bytes[i..][0..2], .little);
 551    const diff = a ^ b;
 552    i += @ctz(diff) / 8;
 553    assert(i <= token.max_length);
 554    return i;
 555}
 556
 557test betterMatchLen {
 558    try std.testing.fuzz({}, testFuzzedMatchLen, .{});
 559}
 560
 561fn testFuzzedMatchLen(_: void, input: []const u8) !void {
 562    @disableInstrumentation();
 563    var r: Io.Reader = .fixed(input);
 564    var buf: [1024]u8 = undefined;
 565    var w: Writer = .fixed(&buf);
 566    var old = r.takeLeb128(u9) catch 0;
 567    var bytes_off = @max(1, r.takeLeb128(u10) catch 258);
 568    const prev_back = @max(1, r.takeLeb128(u10) catch 258);
 569
 570    while (r.takeByte()) |byte| {
 571        const op: packed struct(u8) {
 572            kind: enum(u2) { splat, copy, insert_imm, insert },
 573            imm: u6,
 574
 575            pub fn immOrByte(op_s: @This(), r_s: *Io.Reader) usize {
 576                return if (op_s.imm == 0) op_s.imm else @as(usize, r_s.takeByte() catch 0) + 64;
 577            }
 578        } = @bitCast(byte);
 579        (switch (op.kind) {
 580            .splat => w.splatByteAll(r.takeByte() catch 0, op.immOrByte(&r)),
 581            .copy => write: {
 582                const start = w.buffered().len -| op.immOrByte(&r);
 583                const len = @min(w.buffered().len - start, r.takeByte() catch 3);
 584                break :write w.writeAll(w.buffered()[start..][0..len]);
 585            },
 586            .insert_imm => w.writeByte(op.imm),
 587            .insert => w.writeAll(r.take(
 588                @min(r.bufferedLen(), @as(usize, op.imm) + 1),
 589            ) catch unreachable),
 590        }) catch break;
 591    } else |_| {}
 592
 593    w.splatByteAll(0, (1 + 3) -| w.buffered().len) catch unreachable;
 594    bytes_off = @min(bytes_off, @as(u10, @intCast(w.buffered().len - 3)));
 595    const prev_off = bytes_off -| prev_back;
 596    assert(prev_off < bytes_off);
 597    const prev = w.buffered()[prev_off..];
 598    const bytes = w.buffered()[bytes_off..];
 599    old = @min(old, bytes.len - 1, token.max_length - 1);
 600
 601    const diff_index = mem.indexOfDiff(u8, prev, bytes).?; // unwrap since lengths are not same
 602    const expected_len = @min(diff_index, 258);
 603    errdefer std.debug.print(
 604        \\prev : '{any}'
 605        \\bytes: '{any}'
 606        \\old     : {}
 607        \\expected: {?}
 608        \\actual  : {}
 609    ++ "\n", .{
 610        prev,                                           bytes,                            old,
 611        if (old < expected_len) expected_len else null, betterMatchLen(old, prev, bytes),
 612    });
 613    if (old < expected_len) {
 614        try std.testing.expectEqual(expected_len, betterMatchLen(old, prev, bytes));
 615    } else {
 616        try std.testing.expect(betterMatchLen(old, prev, bytes) <= old);
 617    }
 618}
 619
 620fn matchAndAddHash(c: *Compress, i: usize, h: Hash, gt: u16, max_chain: u16, good_: u16) struct {
 621    dist: u16,
 622    len: u16,
 623} {
 624    const l = &c.lookup;
 625    const buffered = c.writer.buffered();
 626
 627    var chain_limit = max_chain;
 628    var best_dist: u16 = undefined;
 629    var best_len = gt;
 630    const nice = @min(c.opts.nice, buffered[i..].len);
 631    var good = good_;
 632
 633    search: {
 634        if (l.head[h].is_null) break :search;
 635        // Actually a u15, but LLVM 21 does not optimize that as well (it truncates it each use).
 636        var dist: u16 = l.chain_pos -% l.head[h].value;
 637        while (true) {
 638            chain_limit -= 1;
 639
 640            const match_len = betterMatchLen(best_len, buffered[i - 1 - dist ..], buffered[i..]);
 641            if (match_len > best_len) {
 642                best_dist = dist;
 643                best_len = match_len;
 644                if (best_len >= nice) break;
 645                if (best_len >= good) {
 646                    chain_limit >>= 2;
 647                    good = math.maxInt(u8); // Reduce only once
 648                }
 649            }
 650
 651            if (chain_limit == 0) break;
 652            const next_chain_index = l.chain_pos -% @as(u15, @intCast(dist));
 653            // Equivilent to the below, however LLVM 21 optimizes the below worse.
 654            // if (l.chain[next_chain_index].is_null) break;
 655            // dist, const out_of_window = @addWithOverflow(dist, l.chain[next_chain_index].value);
 656            // if (out_of_window == 1) break;
 657            dist +%= l.chain[next_chain_index].int(); // wrapping for potential null bit
 658            comptime assert(flate.history_len == PackedOptionalU15.int(.null_bit));
 659            // Also, doing >= flate.history_len gives worse codegen with LLVM 21.
 660            if ((dist | l.chain[next_chain_index].int()) & flate.history_len != 0) break;
 661        }
 662    }
 663
 664    c.addHash(i, h);
 665    return .{ .dist = best_dist, .len = best_len };
 666}
 667
 668fn clenHlen(freqs: [19]u16) u4 {
 669    // Note that the first four codes (16, 17, 18, and 0) are always present.
 670    if (builtin.mode != .ReleaseSmall and (std.simd.suggestVectorLength(u16) orelse 1) >= 8) {
 671        const V = @Vector(16, u16);
 672        const hlen_mul: V = comptime m: {
 673            var hlen_mul: [16]u16 = undefined;
 674            for (token.codegen_order[3..], 0..) |i, hlen| {
 675                hlen_mul[i] = hlen;
 676            }
 677            break :m hlen_mul;
 678        };
 679        const encoded = freqs[0..16].* != @as(V, @splat(0));
 680        return @intCast(@reduce(.Max, @intFromBool(encoded) * hlen_mul));
 681    } else {
 682        var max: u4 = 0;
 683        for (token.codegen_order[4..], 1..) |i, len| {
 684            max = if (freqs[i] == 0) max else @intCast(len);
 685        }
 686        return max;
 687    }
 688}
 689
 690test clenHlen {
 691    var freqs: [19]u16 = @splat(0);
 692    try std.testing.expectEqual(0, clenHlen(freqs));
 693    for (token.codegen_order, 1..) |i, len| {
 694        freqs[i] = 1;
 695        try std.testing.expectEqual(len -| 4, clenHlen(freqs));
 696        freqs[i] = 0;
 697    }
 698}
 699
 700/// Returns the number of values followed by the bitsize of the extra bits.
 701fn buildClen(
 702    dyn_bits: []const u4,
 703    out_values: []u8,
 704    out_extra: []u8,
 705    out_freqs: *[19]u16,
 706) struct { u16, u16 } {
 707    assert(dyn_bits.len <= out_values.len);
 708    assert(out_values.len == out_extra.len);
 709
 710    var len: u16 = 0;
 711    var extra_bitsize: u16 = 0;
 712
 713    var remaining_bits = dyn_bits;
 714    var prev: u4 = 0;
 715    while (true) {
 716        const b = remaining_bits[0];
 717        const n_max = @min(@as(u8, if (b != 0)
 718            if (b != prev) 1 else 6
 719        else
 720            138), remaining_bits.len);
 721        prev = b;
 722
 723        var n: u8 = 0;
 724        while (true) {
 725            remaining_bits = remaining_bits[1..];
 726            n += 1;
 727            if (n == n_max or remaining_bits[0] != b) break;
 728        }
 729        const code, const extra, const xsize = switch (n) {
 730            0 => unreachable,
 731            1...2 => .{ b, 0, 0 },
 732            3...10 => .{
 733                @as(u8, 16) + @intFromBool(b == 0),
 734                n - 3,
 735                @as(u8, 2) + @intFromBool(b == 0),
 736            },
 737            11...138 => .{ 18, n - 11, 7 },
 738            else => unreachable,
 739        };
 740        while (true) {
 741            out_values[len] = code;
 742            out_extra[len] = extra;
 743            out_freqs[code] += 1;
 744            extra_bitsize += xsize;
 745            len += 1;
 746            if (n != 2) {
 747                @branchHint(.likely);
 748                break;
 749            }
 750            // Code needs outputted once more
 751            n = 1;
 752        }
 753        if (remaining_bits.len == 0) break;
 754    }
 755
 756    return .{ len, extra_bitsize };
 757}
 758
 759test buildClen {
 760    //dyn_bits: []u4,
 761    //out_values: *[288 + 30]u8,
 762    //out_extra: *[288 + 30]u8,
 763    //out_freqs: *[19]u16,
 764    //struct { u16, u16 }
 765    var out_values: [288 + 30]u8 = undefined;
 766    var out_extra: [288 + 30]u8 = undefined;
 767    var out_freqs: [19]u16 = @splat(0);
 768    const len, const extra_bitsize = buildClen(&([_]u4{
 769        1, // A
 770        2, 2, // B
 771        3, 3, 3, // C
 772        4, 4, 4, 4, // D
 773        5, // E
 774        5, 5, 5, 5, 5, 5, //
 775        5, 5, 5, 5, 5, 5,
 776        5, 5,
 777        0, 1, // F
 778        0, 0, 1, // G
 779    } ++ @as([138 + 10]u4, @splat(0)) // H
 780    ), &out_values, &out_extra, &out_freqs);
 781    try std.testing.expectEqualSlices(u8, &.{
 782        1, // A
 783        2, 2, // B
 784        3, 3, 3, // C
 785        4, 16, // D
 786        5, 16, 16, 5, 5, // E
 787        0, 1, // F
 788        0, 0, 1, // G
 789        18, 17, // H
 790    }, out_values[0..len]);
 791    try std.testing.expectEqualSlices(u8, &.{
 792        0, // A
 793        0, 0, // B
 794        0, 0, 0, // C
 795        0, (0), // D
 796        0, (3), (3), 0, 0, // E
 797        0, 0, // F
 798        0, 0, 0, // G
 799        (127), (7), // H
 800    }, out_extra[0..len]);
 801    try std.testing.expectEqual(2 + 2 + 2 + 7 + 3, extra_bitsize);
 802    try std.testing.expectEqualSlices(u16, &.{
 803        3, 3, 2, 3, 1, 3, 0, 0,
 804        0, 0, 0, 0, 0, 0, 0, 0,
 805        3, 1, 1,
 806    }, &out_freqs);
 807}
 808
 809fn writeBlock(c: *Compress, eos: bool) Writer.Error!void {
 810    const toks = &c.buffered_tokens;
 811    if (!eos) assert(toks.n == block_tokens);
 812    assert(toks.lit_freqs[256] == 0);
 813    toks.lit_freqs[256] = 1;
 814
 815    var dyn_codes_buf: [286 + 30]u16 = undefined;
 816    var dyn_bits_buf: [286 + 30]u4 = @splat(0);
 817
 818    const dyn_lit_codes_bitsize, const dyn_last_lit = huffman.build(
 819        &toks.lit_freqs,
 820        dyn_codes_buf[0..286],
 821        dyn_bits_buf[0..286],
 822        15,
 823        true,
 824    );
 825    const dyn_lit_len = @max(257, dyn_last_lit + 1);
 826
 827    const dyn_dist_codes_bitsize, const dyn_last_dist = huffman.build(
 828        &toks.dist_freqs,
 829        dyn_codes_buf[dyn_lit_len..][0..30],
 830        dyn_bits_buf[dyn_lit_len..][0..30],
 831        15,
 832        true,
 833    );
 834    const dyn_dist_len = @max(1, dyn_last_dist + 1);
 835
 836    var clen_values: [288 + 30]u8 = undefined;
 837    var clen_extra: [288 + 30]u8 = undefined;
 838    var clen_freqs: [19]u16 = @splat(0);
 839    const clen_len, const clen_extra_bitsize = buildClen(
 840        dyn_bits_buf[0 .. dyn_lit_len + dyn_dist_len],
 841        &clen_values,
 842        &clen_extra,
 843        &clen_freqs,
 844    );
 845
 846    var clen_codes: [19]u16 = undefined;
 847    var clen_bits: [19]u4 = @splat(0);
 848    const clen_codes_bitsize, _ = huffman.build(
 849        &clen_freqs,
 850        &clen_codes,
 851        &clen_bits,
 852        7,
 853        false,
 854    );
 855    const hclen = clenHlen(clen_freqs);
 856
 857    const dynamic_bitsize = @as(u32, 14) +
 858        (4 + @as(u6, hclen)) * 3 + clen_codes_bitsize + clen_extra_bitsize +
 859        dyn_lit_codes_bitsize + dyn_dist_codes_bitsize;
 860    const fixed_bitsize = n: {
 861        const freq7 = 1; // eos
 862        var freq8: u16 = 0;
 863        var freq9: u16 = 0;
 864        var freq12: u16 = 0; // 7 + 5 - match freqs always have corresponding 5-bit dist freq
 865        var freq13: u16 = 0; // 8 + 5
 866        for (toks.lit_freqs[0..144]) |f| freq8 += f;
 867        for (toks.lit_freqs[144..256]) |f| freq9 += f;
 868        assert(toks.lit_freqs[256] == 1);
 869        for (toks.lit_freqs[257..280]) |f| freq12 += f;
 870        for (toks.lit_freqs[280..286]) |f| freq13 += f;
 871        break :n @as(u32, freq7) * 7 +
 872            @as(u32, freq8) * 8 + @as(u32, freq9) * 9 +
 873            @as(u32, freq12) * 12 + @as(u32, freq13) * 13;
 874    };
 875
 876    stored: {
 877        for (toks.dist_freqs) |n| if (n != 0) break :stored;
 878        // No need to check len frequencies since they each have a corresponding dist frequency
 879        assert(for (toks.lit_freqs[257..]) |f| (if (f != 0) break false) else true);
 880
 881        // No matches. If the stored size is smaller than the huffman-encoded version, it will be
 882        // outputed in a store block. This is not done with matches since the original input would
 883        // need to be stored since the window may slid, and it may also exceed 65535 bytes. This
 884        // should be OK since most inputs with matches should be more compressable anyways.
 885        const stored_align_bits = -%(c.bit_writer.buffered_n +% 3);
 886        const stored_bitsize = stored_align_bits + @as(u32, 32) + @as(u32, toks.n) * 8;
 887        if (@min(dynamic_bitsize, fixed_bitsize) < stored_bitsize) break :stored;
 888
 889        try c.bit_writer.write(BlockHeader.int(.{ .kind = .stored, .final = eos }), 3);
 890        try c.bit_writer.output.rebase(0, 5);
 891        c.bit_writer.byteAlign();
 892        c.bit_writer.output.writeInt(u16, c.buffered_tokens.n, .little) catch unreachable;
 893        c.bit_writer.output.writeInt(u16, ~c.buffered_tokens.n, .little) catch unreachable;
 894
 895        // Relatively small buffer since regular draining will
 896        // always consume slightly less than 2 << 15 bytes.
 897        var vec_buf: [4][]const u8 = undefined;
 898        var vec_n: usize = 0;
 899        var i: usize = 0;
 900
 901        assert(c.buffered_tokens.pos != 0);
 902        while (i != c.buffered_tokens.pos) {
 903            const h: TokenBufferEntryHeader = @bitCast(toks.list[i..][0..2].*);
 904            assert(h.kind == .bytes);
 905
 906            i += 2;
 907            vec_buf[vec_n] = toks.list[i..][0..h.data];
 908            i += h.data;
 909
 910            vec_n += 1;
 911            if (i == c.buffered_tokens.pos or vec_n == vec_buf.len) {
 912                try c.bit_writer.output.writeVecAll(vec_buf[0..vec_n]);
 913                vec_n = 0;
 914            }
 915        }
 916
 917        toks.* = .empty;
 918        return;
 919    }
 920
 921    const lit_codes, const lit_bits, const dist_codes, const dist_bits =
 922        if (dynamic_bitsize < fixed_bitsize) codes: {
 923            try c.bit_writer.write(BlockHeader.Dynamic.int(.{
 924                .regular = .{ .final = eos, .kind = .dynamic },
 925                .hlit = @intCast(dyn_lit_len - 257),
 926                .hdist = @intCast(dyn_dist_len - 1),
 927                .hclen = hclen,
 928            }), 17);
 929            try c.bit_writer.writeClen(
 930                hclen,
 931                clen_values[0..clen_len],
 932                clen_extra[0..clen_len],
 933                clen_codes,
 934                clen_bits,
 935            );
 936            break :codes .{
 937                dyn_codes_buf[0..dyn_lit_len],
 938                dyn_bits_buf[0..dyn_lit_len],
 939                dyn_codes_buf[dyn_lit_len..][0..dyn_dist_len],
 940                dyn_bits_buf[dyn_lit_len..][0..dyn_dist_len],
 941            };
 942        } else codes: {
 943            try c.bit_writer.write(BlockHeader.int(.{ .final = eos, .kind = .fixed }), 3);
 944            break :codes .{
 945                &token.fixed_lit_codes,
 946                &token.fixed_lit_bits,
 947                &token.fixed_dist_codes,
 948                &token.fixed_dist_bits,
 949            };
 950        };
 951
 952    var i: usize = 0;
 953    while (i != toks.pos) {
 954        const h: TokenBufferEntryHeader = @bitCast(toks.list[i..][0..2].*);
 955        i += 2;
 956        if (h.kind == .bytes) {
 957            for (toks.list[i..][0..h.data]) |b| {
 958                try c.bit_writer.write(lit_codes[b], lit_bits[b]);
 959            }
 960            i += h.data;
 961        } else {
 962            const dist = h.data;
 963            const len = toks.list[i];
 964            i += 1;
 965            const dist_code = token.DistCode.fromVal(dist);
 966            const len_code = token.LenCode.fromVal(len);
 967            const dist_val = dist_code.toInt();
 968            const lit_val = @as(u16, 257) + len_code.toInt();
 969
 970            var out: u48 = lit_codes[lit_val];
 971            var out_bits: u6 = lit_bits[lit_val];
 972            out |= @shlExact(@as(u20, len - len_code.base()), @intCast(out_bits));
 973            out_bits += len_code.extraBits();
 974
 975            out |= @shlExact(@as(u35, dist_codes[dist_val]), out_bits);
 976            out_bits += dist_bits[dist_val];
 977            out |= @shlExact(@as(u48, dist - dist_code.base()), out_bits);
 978            out_bits += dist_code.extraBits();
 979
 980            try c.bit_writer.write(out, out_bits);
 981        }
 982    }
 983    try c.bit_writer.write(lit_codes[256], lit_bits[256]);
 984
 985    toks.* = .empty;
 986}
 987
 988/// Huffman tree construction.
 989///
 990/// The approach for building the huffman tree is [taken from zlib]
 991/// (https://github.com/madler/zlib/blob/v1.3.1/trees.c#L625) with some modifications.
 992const huffman = struct {
 993    const max_leafs = 286;
 994    const max_nodes = max_leafs * 2;
 995
 996    const Node = packed struct(u32) {
 997        depth: u16,
 998        freq: u16,
 999
1000        pub const Index = u16;
1001
1002        /// `freq` is more significant than `depth`
1003        pub fn smaller(a: Node, b: Node) bool {
1004            return @as(u32, @bitCast(a)) < @as(u32, @bitCast(b));
1005        }
1006    };
1007
1008    fn heapSiftDown(nodes: []Node, heap: []Node.Index, start: usize) void {
1009        var i = start;
1010        while (true) {
1011            var min = i;
1012            const l = i * 2 + 1;
1013            const r = l + 1;
1014            min = if (l < heap.len and nodes[heap[l]].smaller(nodes[heap[min]])) l else min;
1015            min = if (r < heap.len and nodes[heap[r]].smaller(nodes[heap[min]])) r else min;
1016            if (i == min) break;
1017            mem.swap(Node.Index, &heap[i], &heap[min]);
1018            i = min;
1019        }
1020    }
1021
1022    fn heapRemoveRoot(nodes: []Node, heap: []Node.Index) void {
1023        heap[0] = heap[heap.len - 1];
1024        heapSiftDown(nodes, heap[0 .. heap.len - 1], 0);
1025    }
1026
1027    /// Returns the total bits to encode `freqs` followed by the index of the last non-zero bits.
1028    /// For `freqs[i]` == 0, `out_codes[i]` will be undefined.
1029    /// It is asserted `out_bits` is zero-filled.
1030    /// It is asserted `out_bits.len` is at least a length of
1031    /// one if ncomplete trees are allowed and two otherwise.
1032    pub fn build(
1033        freqs: []const u16,
1034        out_codes: []u16,
1035        out_bits: []u4,
1036        max_bits: u4,
1037        incomplete_allowed: bool,
1038    ) struct { u32, u16 } {
1039        assert(out_codes.len - 1 >= @intFromBool(incomplete_allowed));
1040        // freqs and out_codes are in the loop to assert they are all the same length
1041        for (freqs, out_codes, out_bits) |_, _, n| assert(n == 0);
1042        assert(out_codes.len <= @as(u16, 1) << max_bits);
1043
1044        // Indexes 0..freqs are leafs, indexes max_leafs.. are internal nodes.
1045        var tree_nodes: [max_nodes]Node = undefined;
1046        var tree_parent_nodes: [max_nodes]Node.Index = undefined;
1047        var nodes_end: u16 = max_leafs;
1048        // Dual-purpose buffer. Nodes are ordered by least frequency or when equal, least depth.
1049        // The start is a min heap of level-zero nodes.
1050        // The end is a sorted buffer of nodes with the greatest first.
1051        var node_buf: [max_nodes]Node.Index = undefined;
1052        var heap_end: u16 = 0;
1053        var sorted_start: u16 = node_buf.len;
1054
1055        for (0.., freqs) |n, freq| {
1056            tree_nodes[n] = .{ .freq = freq, .depth = 0 };
1057            node_buf[heap_end] = @intCast(n);
1058            heap_end += @intFromBool(freq != 0);
1059        }
1060
1061        // There must be at least one code at minimum,
1062        node_buf[heap_end] = 0;
1063        heap_end += @intFromBool(heap_end == 0);
1064        // and at least two if incomplete must be avoided.
1065        if (heap_end == 1 and incomplete_allowed) {
1066            @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
1067
1068            // Codes must have at least one-bit, so this is a special case.
1069            out_bits[node_buf[0]] = 1;
1070            out_codes[node_buf[0]] = 0;
1071            return .{ freqs[node_buf[0]], node_buf[0] };
1072        }
1073        const last_nonzero = @max(node_buf[heap_end - 1], 1); // For heap_end > 1, last is not be 0
1074        node_buf[heap_end] = @intFromBool(node_buf[0] == 0);
1075        heap_end += @intFromBool(heap_end == 1);
1076
1077        // Heapify the array of frequencies
1078        const heapify_final = heap_end - 1;
1079        const heapify_start = (heapify_final - 1) / 2; // Parent of final node
1080        var heapify_i = heapify_start;
1081        while (true) {
1082            heapSiftDown(&tree_nodes, node_buf[0..heap_end], heapify_i);
1083            if (heapify_i == 0) break;
1084            heapify_i -= 1;
1085        }
1086
1087        // Build optimal tree. `max_bits` is not enforced yet.
1088        while (heap_end > 1) {
1089            const a = node_buf[0];
1090            heapRemoveRoot(&tree_nodes, node_buf[0..heap_end]);
1091            heap_end -= 1;
1092            const b = node_buf[0];
1093
1094            sorted_start -= 2;
1095            node_buf[sorted_start..][0..2].* = .{ b, a };
1096
1097            tree_nodes[nodes_end] = .{
1098                .freq = tree_nodes[a].freq + tree_nodes[b].freq,
1099                .depth = @max(tree_nodes[a].depth, tree_nodes[b].depth) + 1,
1100            };
1101            defer nodes_end += 1;
1102            tree_parent_nodes[a] = nodes_end;
1103            tree_parent_nodes[b] = nodes_end;
1104
1105            node_buf[0] = nodes_end;
1106            heapSiftDown(&tree_nodes, node_buf[0..heap_end], 0);
1107        }
1108        sorted_start -= 1;
1109        node_buf[sorted_start] = node_buf[0];
1110
1111        var bit_counts: [16]u16 = @splat(0);
1112        buildBits(out_bits, &bit_counts, &tree_parent_nodes, node_buf[sorted_start..], max_bits);
1113        return .{ buildValues(freqs, out_codes, out_bits, bit_counts), last_nonzero };
1114    }
1115
1116    fn buildBits(
1117        out_bits: []u4,
1118        bit_counts: *[16]u16,
1119        parent_nodes: *[max_nodes]Node.Index,
1120        sorted: []Node.Index,
1121        max_bits: u4,
1122    ) void {
1123        var internal_node_bits: [max_nodes - max_leafs]u4 = undefined;
1124        var overflowed: u16 = 0;
1125
1126        internal_node_bits[sorted[0] - max_leafs] = 0; // root
1127        for (sorted[1..]) |i| {
1128            const parent_bits = internal_node_bits[parent_nodes[i] - max_leafs];
1129            overflowed += @intFromBool(parent_bits == max_bits);
1130            const bits = parent_bits + @intFromBool(parent_bits != max_bits);
1131            bit_counts[bits] += @intFromBool(i < max_leafs);
1132            (if (i >= max_leafs) &internal_node_bits[i - max_leafs] else &out_bits[i]).* = bits;
1133        }
1134
1135        if (overflowed == 0) {
1136            @branchHint(.likely);
1137            return;
1138        }
1139
1140        outer: while (true) {
1141            var deepest: u4 = max_bits - 1;
1142            while (bit_counts[deepest] == 0) deepest -= 1;
1143            while (overflowed != 0) {
1144                // Insert an internal node under the leaf and move an overflow as its sibling
1145                bit_counts[deepest] -= 1;
1146                bit_counts[deepest + 1] += 2;
1147                // Only overflow moved. Its sibling's depth is one less, however is still >= depth.
1148                bit_counts[max_bits] -= 1;
1149                overflowed -= 2;
1150
1151                if (overflowed == 0) break :outer;
1152                deepest += 1;
1153                if (deepest == max_bits) continue :outer;
1154            }
1155        }
1156
1157        // Reassign bit lengths
1158        assert(bit_counts[0] == 0);
1159        var i: usize = 0;
1160        for (1.., bit_counts[1..]) |bits, all| {
1161            var remaining = all;
1162            while (remaining != 0) {
1163                defer i += 1;
1164                if (sorted[i] >= max_leafs) continue;
1165                out_bits[sorted[i]] = @intCast(bits);
1166                remaining -= 1;
1167            }
1168        }
1169        assert(for (sorted[i..]) |n| { // all leafs consumed
1170            if (n < max_leafs) break false;
1171        } else true);
1172    }
1173
1174    fn buildValues(freqs: []const u16, out_codes: []u16, bits: []u4, bit_counts: [16]u16) u32 {
1175        var code: u16 = 0;
1176        var base: [16]u16 = undefined;
1177        assert(bit_counts[0] == 0);
1178        for (bit_counts[1..], base[1..]) |c, *b| {
1179            b.* = code;
1180            code +%= c;
1181            code <<= 1;
1182        }
1183        var freq_sums: [16]u16 = @splat(0);
1184        for (out_codes, bits, freqs) |*c, b, f| {
1185            c.* = @bitReverse(base[b]) >> -%b;
1186            base[b] += 1; // For `b == 0` this is fine since v is specified to be undefined.
1187            freq_sums[b] += f;
1188        }
1189        return @reduce(.Add, @as(@Vector(16, u32), freq_sums) * std.simd.iota(u32, 16));
1190    }
1191
1192    test build {
1193        var codes: [8]u16 = undefined;
1194        var bits: [8]u4 = undefined;
1195
1196        const regular_freqs: [8]u16 = .{ 1, 1, 0, 8, 8, 0, 2, 4 };
1197        // The optimal tree for the above frequencies is
1198        // 4             1   1
1199        //                \ /
1200        // 3           2   #
1201        //              \ /
1202        // 2   8   8 4   #
1203        //      \ /   \ /
1204        // 1     #     #
1205        //        \   /
1206        // 0        #
1207        bits = @splat(0);
1208        var n, var lnz = build(&regular_freqs, &codes, &bits, 15, true);
1209        codes[2] = 0;
1210        codes[5] = 0;
1211        try std.testing.expectEqualSlices(u4, &.{ 4, 4, 0, 2, 2, 0, 3, 2 }, &bits);
1212        try std.testing.expectEqualSlices(u16, &.{
1213            0b0111, 0b1111, 0, 0b00, 0b10, 0, 0b011, 0b01,
1214        }, &codes);
1215        try std.testing.expectEqual(54, n);
1216        try std.testing.expectEqual(7, lnz);
1217        // When constrained to 3 bits, it becomes
1218        // 3        1   1 2   4
1219        //           \ /   \ /
1220        // 2   8   8  #     #
1221        //      \ /    \   /
1222        // 1     #       #
1223        //        \     /
1224        // 0         #
1225        bits = @splat(0);
1226        n, lnz = build(&regular_freqs, &codes, &bits, 3, true);
1227        codes[2] = 0;
1228        codes[5] = 0;
1229        try std.testing.expectEqualSlices(u4, &.{ 3, 3, 0, 2, 2, 0, 3, 3 }, &bits);
1230        try std.testing.expectEqualSlices(u16, &.{
1231            0b001, 0b101, 0, 0b00, 0b10, 0, 0b011, 0b111,
1232        }, &codes);
1233        try std.testing.expectEqual(56, n);
1234        try std.testing.expectEqual(7, lnz);
1235
1236        // Empty tree. At least one code should be present
1237        bits = @splat(0);
1238        n, lnz = build(&.{ 0, 0 }, codes[0..2], bits[0..2], 15, true);
1239        try std.testing.expectEqualSlices(u4, &.{ 1, 0 }, bits[0..2]);
1240        try std.testing.expectEqual(0b0, codes[0]);
1241        try std.testing.expectEqual(0, n);
1242        try std.testing.expectEqual(0, lnz);
1243
1244        // Check all incompletable frequencies are completed
1245        for ([_][2]u16{ .{ 0, 0 }, .{ 0, 1 }, .{ 1, 0 } }) |incomplete| {
1246            // Empty tree. Both codes should be present to prevent incomplete trees
1247            bits = @splat(0);
1248            n, lnz = build(&incomplete, codes[0..2], bits[0..2], 15, false);
1249            try std.testing.expectEqualSlices(u4, &.{ 1, 1 }, bits[0..2]);
1250            try std.testing.expectEqualSlices(u16, &.{ 0b0, 0b1 }, codes[0..2]);
1251            try std.testing.expectEqual(incomplete[0] + incomplete[1], n);
1252            try std.testing.expectEqual(1, lnz);
1253        }
1254
1255        try std.testing.fuzz({}, checkFuzzedBuildFreqs, .{});
1256    }
1257
1258    fn checkFuzzedBuildFreqs(_: void, freqs: []const u8) !void {
1259        @disableInstrumentation();
1260        var r: Io.Reader = .fixed(freqs);
1261        var freqs_limit: u16 = 65535;
1262        var freqs_buf: [max_leafs]u16 = undefined;
1263        var nfreqs: u15 = 0;
1264
1265        const params: packed struct(u8) {
1266            max_bits: u4,
1267            _: u3,
1268            incomplete_allowed: bool,
1269        } = @bitCast(r.takeByte() catch 255);
1270        while (nfreqs != freqs_buf.len) {
1271            const leb = r.takeLeb128(u16);
1272            const f = if (leb) |f| @min(f, freqs_limit) else |e| switch (e) {
1273                error.ReadFailed => unreachable,
1274                error.EndOfStream => 0,
1275                error.Overflow => freqs_limit,
1276            };
1277            freqs_buf[nfreqs] = f;
1278            nfreqs += 1;
1279            freqs_limit -= f;
1280            if (leb == error.EndOfStream and nfreqs - 1 > @intFromBool(params.incomplete_allowed))
1281                break;
1282        }
1283
1284        var codes_buf: [max_leafs]u16 = undefined;
1285        var bits_buf: [max_leafs]u4 = @splat(0);
1286        const total_bits, const last_nonzero = build(
1287            freqs_buf[0..nfreqs],
1288            codes_buf[0..nfreqs],
1289            bits_buf[0..nfreqs],
1290            @max(math.log2_int_ceil(u15, nfreqs), params.max_bits),
1291            params.incomplete_allowed,
1292        );
1293
1294        var has_bitlen_one: bool = false;
1295        var expected_total_bits: u32 = 0;
1296        var expected_last_nonzero: ?u16 = null;
1297        var weighted_sum: u32 = 0;
1298        for (freqs_buf[0..nfreqs], bits_buf[0..nfreqs], 0..) |f, nb, i| {
1299            has_bitlen_one = has_bitlen_one or nb == 1;
1300            weighted_sum += @shlExact(@as(u16, 1), 15 - nb) & ((1 << 15) - 1);
1301            expected_total_bits += @as(u32, f) * nb;
1302            if (nb != 0) expected_last_nonzero = @intCast(i);
1303        }
1304
1305        errdefer std.log.err(
1306            \\ params: {}
1307            \\ freqs: {any}
1308            \\ bits: {any}
1309            \\ # freqs: {}
1310            \\ max bits: {} 
1311            \\ weighted sum: {}
1312            \\ has_bitlen_one: {}
1313            \\ expected/actual total bits: {}/{}
1314            \\ expected/actual last nonzero: {?}/{}
1315        ++ "\n", .{
1316            params,
1317            freqs_buf[0..nfreqs],
1318            bits_buf[0..nfreqs],
1319            nfreqs,
1320            @max(math.log2_int_ceil(u15, nfreqs), params.max_bits),
1321            weighted_sum,
1322            has_bitlen_one,
1323            expected_total_bits,
1324            total_bits,
1325            expected_last_nonzero,
1326            last_nonzero,
1327        });
1328
1329        try std.testing.expectEqual(expected_total_bits, total_bits);
1330        try std.testing.expectEqual(expected_last_nonzero, last_nonzero);
1331        if (weighted_sum > 1 << 15)
1332            return error.OversubscribedHuffmanTree;
1333        if (weighted_sum < 1 << 15 and
1334            !(params.incomplete_allowed and has_bitlen_one and weighted_sum == 1 << 14))
1335            return error.IncompleteHuffmanTree;
1336    }
1337};
1338
1339test {
1340    _ = huffman;
1341}
1342
1343/// [0] is a gradient where the probability of lower values decreases across it
1344/// [1] is completely random and hence uncompressable
1345fn testingFreqBufs() !*[2][65536]u8 {
1346    const fbufs = try std.testing.allocator.create([2][65536]u8);
1347    var prng: std.Random.DefaultPrng = .init(std.testing.random_seed);
1348    prng.random().bytes(&fbufs[0]);
1349    prng.random().bytes(&fbufs[1]);
1350    for (0.., &fbufs[0], fbufs[1]) |i, *grad, rand| {
1351        const prob = @as(u8, @intCast(255 - i / (fbufs[0].len * 256)));
1352        grad.* /= @max(1, rand / @max(1, prob));
1353    }
1354    return fbufs;
1355}
1356
1357fn testingCheckDecompressedMatches(
1358    flate_bytes: []const u8,
1359    expected_size: u32,
1360    expected_hash: flate.Container.Hasher,
1361) !void {
1362    const container: flate.Container = expected_hash;
1363    var data_hash: flate.Container.Hasher = .init(container);
1364    var data_size: u32 = 0;
1365    var flate_r: Io.Reader = .fixed(flate_bytes);
1366    var deflate_buf: [flate.max_window_len]u8 = undefined;
1367    var deflate: flate.Decompress = .init(&flate_r, container, &deflate_buf);
1368
1369    while (deflate.reader.peekGreedy(1)) |bytes| {
1370        data_size += @intCast(bytes.len);
1371        data_hash.update(bytes);
1372        deflate.reader.toss(bytes.len);
1373    } else |e| switch (e) {
1374        error.ReadFailed => return deflate.err.?,
1375        error.EndOfStream => {},
1376    }
1377
1378    try testingCheckContainerHash(
1379        expected_size,
1380        expected_hash,
1381        data_hash,
1382        data_size,
1383        deflate.container_metadata,
1384    );
1385}
1386
1387fn testingCheckContainerHash(
1388    expected_size: u32,
1389    expected_hash: flate.Container.Hasher,
1390    actual_hash: flate.Container.Hasher,
1391    actual_size: u32,
1392    actual_meta: flate.Container.Metadata,
1393) !void {
1394    try std.testing.expectEqual(expected_size, actual_size);
1395    switch (actual_hash) {
1396        .raw => {},
1397        .gzip => |gz| {
1398            const expected_crc = expected_hash.gzip.crc.final();
1399            try std.testing.expectEqual(expected_size, actual_meta.gzip.count);
1400            try std.testing.expectEqual(expected_crc, gz.crc.final());
1401            try std.testing.expectEqual(expected_crc, actual_meta.gzip.crc);
1402        },
1403        .zlib => |zl| {
1404            const expected_adler = expected_hash.zlib.adler;
1405            try std.testing.expectEqual(expected_adler, zl.adler);
1406            try std.testing.expectEqual(expected_adler, actual_meta.zlib.adler);
1407        },
1408    }
1409}
1410
1411const PackedContainer = packed struct(u2) {
1412    raw: bool,
1413    other: enum(u1) { gzip, zlib },
1414
1415    pub fn val(c: @This()) flate.Container {
1416        return if (c.raw) .raw else switch (c.other) {
1417            .gzip => .gzip,
1418            .zlib => .zlib,
1419        };
1420    }
1421};
1422
1423test Compress {
1424    const fbufs = try testingFreqBufs();
1425    defer if (!builtin.fuzz) std.testing.allocator.destroy(fbufs);
1426    try std.testing.fuzz(fbufs, testFuzzedCompressInput, .{});
1427}
1428
1429fn testFuzzedCompressInput(fbufs: *const [2][65536]u8, input: []const u8) !void {
1430    var in: Io.Reader = .fixed(input);
1431    var opts: packed struct(u51) {
1432        container: PackedContainer,
1433        buf_size: u16,
1434        good: u8,
1435        nice: u8,
1436        lazy: u8,
1437        /// Not a `u16` to limit it for performance
1438        chain: u9,
1439    } = @bitCast(in.takeLeb128(u51) catch 0);
1440    var expected_hash: flate.Container.Hasher = .init(opts.container.val());
1441    var expected_size: u32 = 0;
1442
1443    var flate_buf: [128 * 1024]u8 = undefined;
1444    var flate_w: Writer = .fixed(&flate_buf);
1445    var deflate_buf: [flate.max_window_len * 2]u8 = undefined;
1446    var deflate_w = try Compress.init(
1447        &flate_w,
1448        deflate_buf[0 .. flate.max_window_len + @as(usize, opts.buf_size)],
1449        opts.container.val(),
1450        .{
1451            .good = @as(u16, opts.good) + 3,
1452            .nice = @as(u16, opts.nice) + 3,
1453            .lazy = @as(u16, @min(opts.lazy, opts.nice)) + 3,
1454            .chain = @max(1, opts.chain, @as(u8, 4) * @intFromBool(opts.good <= opts.lazy)),
1455        },
1456    );
1457
1458    // It is ensured that more bytes are not written then this to ensure this run
1459    // does not take too long and that `flate_buf` does not run out of space.
1460    const flate_buf_blocks = flate_buf.len / block_tokens;
1461    // Allow a max overhead of 64 bytes per block since the implementation does not gaurauntee it
1462    // writes store blocks when optimal. This comes from taking less than 32 bytes to write an
1463    // optimal dynamic block header of mostly bitlen 8 codes and the end of block literal plus
1464    // `(65536 / 256) / 8`, which is is the maximum number of extra bytes from bitlen 9 codes. An
1465    // extra 32 bytes is reserved on top of that for container headers and footers.
1466    const max_size = flate_buf.len - (flate_buf_blocks * 64 + 32);
1467
1468    while (true) {
1469        const data: packed struct(u36) {
1470            is_rebase: bool,
1471            is_bytes: bool,
1472            params: packed union {
1473                copy: packed struct(u34) {
1474                    len_lo: u5,
1475                    dist: u15,
1476                    len_hi: u4,
1477                    _: u10,
1478                },
1479                bytes: packed struct(u34) {
1480                    kind: enum(u1) { gradient, random },
1481                    off_hi: u4,
1482                    len_lo: u10,
1483                    off_mi: u4,
1484                    len_hi: u5,
1485                    off_lo: u8,
1486                    _: u2,
1487                },
1488                rebase: packed struct(u34) {
1489                    preserve: u17,
1490                    capacity: u17,
1491                },
1492            },
1493        } = @bitCast(in.takeLeb128(u36) catch |e| switch (e) {
1494            error.ReadFailed => unreachable,
1495            error.Overflow => 0,
1496            error.EndOfStream => break,
1497        });
1498
1499        const buffered = deflate_w.writer.buffered();
1500        // Required for repeating patterns and since writing from `buffered` is illegal
1501        var copy_buf: [512]u8 = undefined;
1502
1503        if (data.is_rebase) {
1504            const usable_capacity = deflate_w.writer.buffer.len - rebase_reserved_capacity;
1505            const preserve = @min(data.params.rebase.preserve, usable_capacity);
1506            const capacity = @min(data.params.rebase.capacity, usable_capacity -
1507                @max(rebase_min_preserve, preserve));
1508            try deflate_w.writer.rebase(preserve, capacity);
1509            continue;
1510        }
1511
1512        const max_bytes = max_size -| expected_size;
1513        const bytes = if (!data.is_bytes and buffered.len != 0) bytes: {
1514            const dist = @min(buffered.len, @as(u32, data.params.copy.dist) + 1);
1515            const len = @min(
1516                @max(@shlExact(@as(u9, data.params.copy.len_hi), 5) | data.params.copy.len_lo, 1),
1517                max_bytes,
1518            );
1519            // Reuse the implementation's history. Otherwise our own would need maintained.
1520            const bytes_start = buffered[buffered.len - dist ..];
1521            const history_bytes = bytes_start[0..@min(bytes_start.len, len)];
1522
1523            @memcpy(copy_buf[0..history_bytes.len], history_bytes);
1524            const new_history = len - history_bytes.len;
1525            if (history_bytes.len != len) for ( // check needed for `- dist`
1526                copy_buf[history_bytes.len..][0..new_history],
1527                copy_buf[history_bytes.len - dist ..][0..new_history],
1528            ) |*next, prev| {
1529                next.* = prev;
1530            };
1531            break :bytes copy_buf[0..len];
1532        } else bytes: {
1533            const off = @shlExact(@as(u16, data.params.bytes.off_hi), 12) |
1534                @shlExact(@as(u16, data.params.bytes.off_mi), 8) |
1535                data.params.bytes.off_lo;
1536            const len = @shlExact(@as(u16, data.params.bytes.len_hi), 10) |
1537                data.params.bytes.len_lo;
1538            const fbuf = &fbufs[@intFromEnum(data.params.bytes.kind)];
1539            break :bytes fbuf[off..][0..@min(len, fbuf.len - off, max_bytes)];
1540        };
1541        assert(bytes.len <= max_bytes);
1542        try deflate_w.writer.writeAll(bytes);
1543        expected_hash.update(bytes);
1544        expected_size += @intCast(bytes.len);
1545    }
1546
1547    try deflate_w.writer.flush();
1548    try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash);
1549}
1550
1551/// Does not compress data
1552pub const Raw = struct {
1553    /// After `flush` is called, all vtable calls with result in `error.WriteFailed.`
1554    writer: Writer,
1555    output: *Writer,
1556    hasher: flate.Container.Hasher,
1557
1558    const max_block_size: u16 = 65535;
1559    const full_header: [5]u8 = .{
1560        BlockHeader.int(.{ .final = false, .kind = .stored }),
1561        255,
1562        255,
1563        0,
1564        0,
1565    };
1566
1567    /// While there is no minimum buffer size, it is recommended
1568    /// to be at least `flate.max_window_len` for optimal output.
1569    pub fn init(output: *Writer, buffer: []u8, container: flate.Container) Writer.Error!Raw {
1570        try output.writeAll(container.header());
1571        return .{
1572            .writer = .{
1573                .buffer = buffer,
1574                .vtable = &.{
1575                    .drain = Raw.drain,
1576                    .flush = Raw.flush,
1577                    .rebase = Raw.rebase,
1578                },
1579            },
1580            .output = output,
1581            .hasher = .init(container),
1582        };
1583    }
1584
1585    fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
1586        errdefer w.* = .failing;
1587        const r: *Raw = @fieldParentPtr("writer", w);
1588        const min_block = @min(w.buffer.len, max_block_size);
1589        const pattern = data[data.len - 1];
1590        var partial_header: [5]u8 = undefined;
1591
1592        var vecs: [16][]const u8 = undefined;
1593        var vecs_n: usize = 0;
1594        const data_bytes = Writer.countSplat(data, splat);
1595        const total_bytes = w.end + data_bytes;
1596        var rem_bytes = total_bytes;
1597        var rem_splat = splat;
1598        var rem_data = data;
1599        var rem_data_elem: []const u8 = w.buffered();
1600
1601        assert(rem_bytes > min_block);
1602        while (rem_bytes > min_block) { // not >= to allow `min_block` blocks to be marked as final
1603            // also, it handles the case of `min_block` being zero (no buffer)
1604            const block_size: u16 = @min(rem_bytes, max_block_size);
1605            rem_bytes -= block_size;
1606
1607            if (vecs_n == vecs.len) {
1608                try r.output.writeVecAll(&vecs);
1609                vecs_n = 0;
1610            }
1611            vecs[vecs_n] = if (block_size == 65535)
1612                &full_header
1613            else header: {
1614                partial_header[0] = BlockHeader.int(.{ .final = false, .kind = .stored });
1615                mem.writeInt(u16, partial_header[1..3], block_size, .little);
1616                mem.writeInt(u16, partial_header[3..5], ~block_size, .little);
1617                break :header &partial_header;
1618            };
1619            vecs_n += 1;
1620
1621            var block_limit: Io.Limit = .limited(block_size);
1622            while (true) {
1623                if (vecs_n == vecs.len) {
1624                    try r.output.writeVecAll(&vecs);
1625                    vecs_n = 0;
1626                }
1627
1628                const vec = block_limit.sliceConst(rem_data_elem);
1629                vecs[vecs_n] = vec;
1630                vecs_n += 1;
1631                r.hasher.update(vec);
1632
1633                const is_pattern = rem_splat != splat and vec.len == pattern.len;
1634                if (is_pattern) assert(pattern.len != 0); // exceeded countSplat
1635
1636                if (!is_pattern or rem_splat == 0 or pattern.len > @intFromEnum(block_limit) / 2) {
1637                    rem_data_elem = rem_data_elem[vec.len..];
1638                    block_limit = block_limit.subtract(vec.len).?;
1639
1640                    if (rem_data_elem.len == 0) {
1641                        rem_data_elem = rem_data[0];
1642                        if (rem_data.len != 1) {
1643                            rem_data = rem_data[1..];
1644                        } else if (rem_splat != 0) {
1645                            rem_splat -= 1;
1646                        } else {
1647                            // All of `data` has been consumed.
1648                            assert(block_limit == .nothing);
1649                            assert(rem_bytes == 0);
1650                            // Since `rem_bytes` and `block_limit` are zero, these won't be used.
1651                            rem_data = undefined;
1652                            rem_data_elem = undefined;
1653                            rem_splat = undefined;
1654                        }
1655                    }
1656                    if (block_limit == .nothing) break;
1657                } else {
1658                    const out_splat = @intFromEnum(block_limit) / pattern.len;
1659                    assert(out_splat >= 2);
1660
1661                    try r.output.writeSplatAll(vecs[0..vecs_n], out_splat);
1662                    for (1..out_splat) |_| r.hasher.update(vec);
1663
1664                    vecs_n = 0;
1665                    block_limit = block_limit.subtract(pattern.len * out_splat).?;
1666                    if (rem_splat >= out_splat) {
1667                        // `out_splat` contains `rem_data`, however one more needs subtracted
1668                        // anyways since the next pattern is also being taken.
1669                        rem_splat -= out_splat;
1670                    } else {
1671                        // All of `data` has been consumed.
1672                        assert(block_limit == .nothing);
1673                        assert(rem_bytes == 0);
1674                        // Since `rem_bytes` and `block_limit` are zero, these won't be used.
1675                        rem_data = undefined;
1676                        rem_data_elem = undefined;
1677                        rem_splat = undefined;
1678                    }
1679                    if (block_limit == .nothing) break;
1680                }
1681            }
1682        }
1683
1684        if (vecs_n != 0) { // can be the case if a splat was sent
1685            try r.output.writeVecAll(vecs[0..vecs_n]);
1686        }
1687
1688        if (rem_bytes > data_bytes) {
1689            assert(rem_bytes - data_bytes == rem_data_elem.len);
1690            assert(&rem_data_elem[0] == &w.buffer[total_bytes - rem_bytes]);
1691        }
1692        return w.consume(total_bytes - rem_bytes);
1693    }
1694
1695    fn flush(w: *Writer) Writer.Error!void {
1696        defer w.* = .failing;
1697        try Raw.rebaseInner(w, 0, w.buffer.len, true);
1698    }
1699
1700    fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
1701        errdefer w.* = .failing;
1702        try Raw.rebaseInner(w, preserve, capacity, false);
1703    }
1704
1705    fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
1706        const r: *Raw = @fieldParentPtr("writer", w);
1707        assert(preserve + capacity <= w.buffer.len);
1708        if (eos) assert(capacity == w.buffer.len);
1709
1710        var partial_header: [5]u8 = undefined;
1711        var footer_buf: [8]u8 = undefined;
1712        const preserved = @min(w.end, preserve);
1713        var remaining = w.buffer[0 .. w.end - preserved];
1714
1715        var vecs: [16][]const u8 = undefined;
1716        var vecs_n: usize = 0;
1717        while (remaining.len > max_block_size) { // not >= so there is always a block down below
1718            if (vecs_n == vecs.len) {
1719                try r.output.writeVecAll(&vecs);
1720                vecs_n = 0;
1721            }
1722            vecs[vecs_n + 0] = &full_header;
1723            vecs[vecs_n + 1] = remaining[0..max_block_size];
1724            r.hasher.update(vecs[vecs_n + 1]);
1725            vecs_n += 2;
1726            remaining = remaining[max_block_size..];
1727        }
1728
1729        // eos check required for empty block
1730        if (w.buffer.len - (remaining.len + preserved) < capacity or eos) {
1731            // A partial write is necessary to reclaim enough buffer space
1732            const block_size: u16 = @intCast(remaining.len);
1733            partial_header[0] = BlockHeader.int(.{ .final = eos, .kind = .stored });
1734            mem.writeInt(u16, partial_header[1..3], block_size, .little);
1735            mem.writeInt(u16, partial_header[3..5], ~block_size, .little);
1736
1737            if (vecs_n == vecs.len) {
1738                try r.output.writeVecAll(&vecs);
1739                vecs_n = 0;
1740            }
1741            vecs[vecs_n + 0] = &partial_header;
1742            vecs[vecs_n + 1] = remaining[0..block_size];
1743            r.hasher.update(vecs[vecs_n + 1]);
1744            vecs_n += 2;
1745            remaining = remaining[block_size..];
1746            assert(remaining.len == 0);
1747
1748            if (eos and r.hasher != .raw) {
1749                // the footer is done here instead of `flush` so it can be included in the vector
1750                var footer_w: Writer = .fixed(&footer_buf);
1751                r.hasher.writeFooter(&footer_w) catch unreachable;
1752                assert(footer_w.end != 0);
1753
1754                if (vecs_n == vecs.len) {
1755                    try r.output.writeVecAll(&vecs);
1756                    return r.output.writeAll(footer_w.buffered());
1757                } else {
1758                    vecs[vecs_n] = footer_w.buffered();
1759                    vecs_n += 1;
1760                }
1761            }
1762        }
1763
1764        try r.output.writeVecAll(vecs[0..vecs_n]);
1765        _ = w.consume(w.end - preserved - remaining.len);
1766    }
1767};
1768
1769test Raw {
1770    const data_buf = try std.testing.allocator.create([4 * 65536]u8);
1771    defer if (!builtin.fuzz) std.testing.allocator.destroy(data_buf);
1772    var prng: std.Random.DefaultPrng = .init(std.testing.random_seed);
1773    prng.random().bytes(data_buf);
1774    try std.testing.fuzz(data_buf, testFuzzedRawInput, .{});
1775}
1776
1777fn countVec(data: []const []const u8) usize {
1778    var bytes: usize = 0;
1779    for (data) |d| bytes += d.len;
1780    return bytes;
1781}
1782
1783fn testFuzzedRawInput(data_buf: *const [4 * 65536]u8, input: []const u8) !void {
1784    const HashedStoreWriter = struct {
1785        writer: Writer,
1786        state: enum {
1787            header,
1788            block_header,
1789            block_body,
1790            final_block_body,
1791            footer,
1792            end,
1793        },
1794        block_remaining: u16,
1795        container: flate.Container,
1796        data_hash: flate.Container.Hasher,
1797        data_size: usize,
1798        footer_hash: u32,
1799        footer_size: u32,
1800
1801        pub fn init(buf: []u8, container: flate.Container) @This() {
1802            return .{
1803                .writer = .{
1804                    .vtable = &.{
1805                        .drain = @This().drain,
1806                        .flush = @This().flush,
1807                    },
1808                    .buffer = buf,
1809                },
1810                .state = .header,
1811                .block_remaining = 0,
1812                .container = container,
1813                .data_hash = .init(container),
1814                .data_size = 0,
1815                .footer_hash = undefined,
1816                .footer_size = undefined,
1817            };
1818        }
1819
1820        /// Note that this implementation is somewhat dependent on the implementation of
1821        /// `Raw` by expecting headers / footers to be continous in data elements. It
1822        /// also expects the header to be the same as `flate.Container.header` and not
1823        /// for multiple streams to be concatenated.
1824        fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
1825            errdefer w.* = .failing;
1826            var h: *@This() = @fieldParentPtr("writer", w);
1827
1828            var rem_splat = splat;
1829            var rem_data = data;
1830            var rem_data_elem: []const u8 = w.buffered();
1831
1832            data_loop: while (true) {
1833                const wanted = switch (h.state) {
1834                    .header => h.container.headerSize(),
1835                    .block_header => 5,
1836                    .block_body, .final_block_body => h.block_remaining,
1837                    .footer => h.container.footerSize(),
1838                    .end => 1,
1839                };
1840
1841                if (wanted != 0) {
1842                    while (rem_data_elem.len == 0) {
1843                        rem_data_elem = rem_data[0];
1844                        if (rem_data.len != 1) {
1845                            rem_data = rem_data[1..];
1846                        } else {
1847                            if (rem_splat == 0) {
1848                                break :data_loop;
1849                            } else {
1850                                rem_splat -= 1;
1851                            }
1852                        }
1853                    }
1854                }
1855
1856                const bytes = Io.Limit.limited(wanted).sliceConst(rem_data_elem);
1857                rem_data_elem = rem_data_elem[bytes.len..];
1858
1859                switch (h.state) {
1860                    .header => {
1861                        if (bytes.len < wanted)
1862                            return error.WriteFailed; // header eos
1863                        if (!mem.eql(u8, bytes, h.container.header()))
1864                            return error.WriteFailed; // wrong header
1865                        h.state = .block_header;
1866                    },
1867                    .block_header => {
1868                        if (bytes.len < wanted)
1869                            return error.WriteFailed; // store block header eos
1870                        const header: BlockHeader = @bitCast(@as(u3, @truncate(bytes[0])));
1871                        if (header.kind != .stored)
1872                            return error.WriteFailed; // non-store block
1873                        const len = mem.readInt(u16, bytes[1..3], .little);
1874                        const nlen = mem.readInt(u16, bytes[3..5], .little);
1875                        if (nlen != ~len)
1876                            return error.WriteFailed; // wrong nlen
1877                        h.block_remaining = len;
1878                        h.state = if (!header.final) .block_body else .final_block_body;
1879                    },
1880                    .block_body, .final_block_body => {
1881                        h.data_hash.update(bytes);
1882                        h.data_size += bytes.len;
1883                        h.block_remaining -= @intCast(bytes.len);
1884                        if (h.block_remaining == 0) {
1885                            h.state = if (h.state != .final_block_body) .block_header else .footer;
1886                        }
1887                    },
1888                    .footer => {
1889                        if (bytes.len < wanted)
1890                            return error.WriteFailed; // footer eos
1891                        switch (h.container) {
1892                            .raw => {},
1893                            .gzip => {
1894                                h.footer_hash = mem.readInt(u32, bytes[0..4], .little);
1895                                h.footer_size = mem.readInt(u32, bytes[4..8], .little);
1896                            },
1897                            .zlib => {
1898                                h.footer_hash = mem.readInt(u32, bytes[0..4], .big);
1899                            },
1900                        }
1901                        h.state = .end;
1902                    },
1903                    .end => return error.WriteFailed, // data past end
1904                }
1905            }
1906
1907            w.end = 0;
1908            return Writer.countSplat(data, splat);
1909        }
1910
1911        fn flush(w: *Writer) Writer.Error!void {
1912            defer w.* = .failing; // Clears buffer even if state hasn't reached `end`
1913            _ = try @This().drain(w, &.{""}, 0);
1914        }
1915    };
1916
1917    var in: Io.Reader = .fixed(input);
1918    const opts: packed struct(u19) {
1919        container: PackedContainer,
1920        buf_len: u17,
1921    } = @bitCast(in.takeLeb128(u19) catch 0);
1922    var output: HashedStoreWriter = .init(&.{}, opts.container.val());
1923    var r_buf: [2 * 65536]u8 = undefined;
1924    var r: Raw = try .init(
1925        &output.writer,
1926        r_buf[0 .. opts.buf_len +% flate.max_window_len],
1927        opts.container.val(),
1928    );
1929
1930    var data_base: u18 = 0;
1931    var expected_hash: flate.Container.Hasher = .init(opts.container.val());
1932    var expected_size: u32 = 0;
1933    var vecs: [32][]const u8 = undefined;
1934    var vecs_n: usize = 0;
1935
1936    while (in.seek != in.end) {
1937        const VecInfo = packed struct(u58) {
1938            output: bool,
1939            /// If set, `data_len` and `splat` are reinterpreted as `capacity`
1940            /// and `preserve_len` respectively and `output` is treated as set.
1941            rebase: bool,
1942            block_aligning_len: bool,
1943            block_aligning_splat: bool,
1944            data_len: u18,
1945            splat: u18,
1946            data_off: u18,
1947        };
1948        var vec_info: VecInfo = @bitCast(in.takeLeb128(u58) catch |e| switch (e) {
1949            error.ReadFailed => unreachable,
1950            error.Overflow, error.EndOfStream => 0,
1951        });
1952
1953        {
1954            const buffered = r.writer.buffered().len + countVec(vecs[0..vecs_n]);
1955            const to_align = mem.alignForwardAnyAlign(usize, buffered, Raw.max_block_size) - buffered;
1956            assert((buffered + to_align) % Raw.max_block_size == 0);
1957
1958            if (vec_info.block_aligning_len) {
1959                vec_info.data_len = @intCast(to_align);
1960            } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and
1961                to_align % vec_info.data_len == 0)
1962            {
1963                vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1;
1964            }
1965        }
1966
1967        var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1;
1968        add_vec: {
1969            if (vec_info.rebase) break :add_vec;
1970            if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) >
1971                10 * (1 << 16))
1972            {
1973                // Skip this vector to avoid this test taking too long.
1974                // 10 maximum sized blocks is choosen as the limit since it is two more
1975                // than the maximum the implementation can output in one drain.
1976                splat = 1;
1977                break :add_vec;
1978            }
1979
1980            vecs[vecs_n] = data_buf[@min(
1981                data_base +% vec_info.data_off,
1982                data_buf.len - vec_info.data_len,
1983            )..][0..vec_info.data_len];
1984
1985            data_base +%= vec_info.data_len +% 3; // extra 3 to help catch aliasing bugs
1986
1987            for (0..splat) |_| expected_hash.update(vecs[vecs_n]);
1988            expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat;
1989            vecs_n += 1;
1990        }
1991
1992        const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or
1993            in.seek == in.end;
1994        if (want_drain and vecs_n != 0) {
1995            try r.writer.writeSplatAll(vecs[0..vecs_n], splat);
1996            vecs_n = 0;
1997        } else assert(splat == 1);
1998
1999        if (vec_info.rebase) {
2000            try r.writer.rebase(vec_info.data_len, @min(
2001                r.writer.buffer.len -| vec_info.data_len,
2002                vec_info.splat,
2003            ));
2004        }
2005    }
2006
2007    try r.writer.flush();
2008    try output.writer.flush();
2009
2010    try std.testing.expectEqual(.end, output.state);
2011    try std.testing.expectEqual(expected_size, output.data_size);
2012    switch (output.data_hash) {
2013        .raw => {},
2014        .gzip => |gz| {
2015            const expected_crc = expected_hash.gzip.crc.final();
2016            try std.testing.expectEqual(expected_crc, gz.crc.final());
2017            try std.testing.expectEqual(expected_crc, output.footer_hash);
2018            try std.testing.expectEqual(expected_size, output.footer_size);
2019        },
2020        .zlib => |zl| {
2021            const expected_adler = expected_hash.zlib.adler;
2022            try std.testing.expectEqual(expected_adler, zl.adler);
2023            try std.testing.expectEqual(expected_adler, output.footer_hash);
2024        },
2025    }
2026}
2027
2028/// Only performs huffman compression on data, does no matching.
2029pub const Huffman = struct {
2030    writer: Writer,
2031    bit_writer: BitWriter,
2032    hasher: flate.Container.Hasher,
2033
2034    const max_tokens: u16 = 65535 - 1; // one is reserved for EOF
2035
2036    /// While there is no minimum buffer size, it is recommended
2037    /// to be at least `flate.max_window_len` to improve compression.
2038    ///
2039    /// It is asserted `output` has a capacity of at least 8 bytes.
2040    pub fn init(output: *Writer, buffer: []u8, container: flate.Container) Writer.Error!Huffman {
2041        assert(output.buffer.len > 8);
2042
2043        try output.writeAll(container.header());
2044        return .{
2045            .writer = .{
2046                .buffer = buffer,
2047                .vtable = &.{
2048                    .drain = Huffman.drain,
2049                    .flush = Huffman.flush,
2050                    .rebase = Huffman.rebase,
2051                },
2052            },
2053            .bit_writer = .init(output),
2054            .hasher = .init(container),
2055        };
2056    }
2057
2058    fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
2059        {
2060            //std.debug.print("drain {} (buffered)", .{w.buffered().len});
2061            //for (data) |d| std.debug.print("\n\t+ {}", .{d.len});
2062            //std.debug.print(" x {}\n\n", .{splat});
2063        }
2064
2065        const h: *Huffman = @fieldParentPtr("writer", w);
2066        const min_block = @min(w.buffer.len, max_tokens);
2067        const pattern = data[data.len - 1];
2068
2069        const data_bytes = Writer.countSplat(data, splat);
2070        const total_bytes = w.end + data_bytes;
2071        var rem_bytes = total_bytes;
2072        var rem_splat = splat;
2073        var rem_data = data;
2074        var rem_data_elem: []const u8 = w.buffered();
2075
2076        assert(rem_bytes > min_block);
2077        while (rem_bytes > min_block) { // not >= to allow `min_block` blocks to be marked as final
2078            // also, it handles the case of `min_block` being zero (no buffer)
2079            const block_size: u16 = @min(rem_bytes, max_tokens);
2080            rem_bytes -= block_size;
2081
2082            // Count frequencies
2083            comptime assert(max_tokens != 65535);
2084            var freqs: [257]u16 = @splat(0);
2085            freqs[256] = 1;
2086
2087            const start_splat = rem_splat;
2088            const start_data = rem_data;
2089            const start_data_elem = rem_data_elem;
2090
2091            var block_limit: Io.Limit = .limited(block_size);
2092            while (true) {
2093                const bytes = block_limit.sliceConst(rem_data_elem);
2094                const is_pattern = rem_splat != splat and bytes.len == pattern.len;
2095
2096                const mul = if (!is_pattern) 1 else @intFromEnum(block_limit) / pattern.len;
2097                assert(mul != 0);
2098                if (is_pattern) assert(mul <= rem_splat + 1); // one more for `rem_data`
2099
2100                for (bytes) |b| freqs[b] += @intCast(mul);
2101                rem_data_elem = rem_data_elem[bytes.len..];
2102                block_limit = block_limit.subtract(bytes.len * mul).?;
2103
2104                if (rem_data_elem.len == 0) {
2105                    rem_data_elem = rem_data[0];
2106                    if (rem_data.len != 1) {
2107                        rem_data = rem_data[1..];
2108                    } else if (rem_splat >= mul) {
2109                        // if the counter was not the pattern, `mul` is always one, otherwise,
2110                        // `mul` contains `rem_data`,  however one more needs subtracted anyways
2111                        // since the next pattern is also being taken.
2112                        rem_splat -= mul;
2113                    } else {
2114                        // All of `data` has been consumed.
2115                        assert(block_limit == .nothing);
2116                        assert(rem_bytes == 0);
2117                        // Since `rem_bytes` and `block_limit` are zero, these won't be used.
2118                        rem_data = undefined;
2119                        rem_data_elem = undefined;
2120                        rem_splat = undefined;
2121                    }
2122                }
2123                if (block_limit == .nothing) break;
2124            }
2125
2126            // Output block
2127            rem_splat = start_splat;
2128            rem_data = start_data;
2129            rem_data_elem = start_data_elem;
2130            block_limit = .limited(block_size);
2131
2132            var codes_buf: CodesBuf = .init;
2133            if (try h.outputHeader(&freqs, &codes_buf, block_size, false)) |table| {
2134                while (true) {
2135                    const bytes = block_limit.sliceConst(rem_data_elem);
2136                    rem_data_elem = rem_data_elem[bytes.len..];
2137                    block_limit = block_limit.subtract(bytes.len).?;
2138
2139                    h.hasher.update(bytes);
2140                    for (bytes) |b| {
2141                        try h.bit_writer.write(table.codes[b], table.bits[b]);
2142                    }
2143
2144                    if (rem_data_elem.len == 0) {
2145                        rem_data_elem = rem_data[0];
2146                        if (rem_data.len != 1) {
2147                            rem_data = rem_data[1..];
2148                        } else if (rem_splat != 0) {
2149                            rem_splat -= 1;
2150                        } else {
2151                            // All of `data` has been consumed.
2152                            assert(block_limit == .nothing);
2153                            assert(rem_bytes == 0);
2154                            // Since `rem_bytes` and `block_limit` are zero, these won't be used.
2155                            rem_data = undefined;
2156                            rem_data_elem = undefined;
2157                            rem_splat = undefined;
2158                        }
2159                    }
2160                    if (block_limit == .nothing) break;
2161                }
2162                try h.bit_writer.write(table.codes[256], table.bits[256]);
2163            } else while (true) {
2164                // Store block
2165
2166                // Write data that is not a full vector element
2167                const in_pattern = rem_splat != splat;
2168                const vec_elem_i, const in_data =
2169                    @subWithOverflow(data.len - (rem_data.len - @intFromBool(in_pattern)), 1);
2170                const is_elem = in_data == 0 and data[vec_elem_i].len == rem_data_elem.len;
2171
2172                if (!is_elem or rem_data_elem.len > @intFromEnum(block_limit)) {
2173                    block_limit = block_limit.subtract(rem_data_elem.len) orelse {
2174                        try h.bit_writer.output.writeAll(rem_data_elem[0..@intFromEnum(block_limit)]);
2175                        h.hasher.update(rem_data_elem[0..@intFromEnum(block_limit)]);
2176                        rem_data_elem = rem_data_elem[@intFromEnum(block_limit)..];
2177                        assert(rem_data_elem.len != 0);
2178                        break;
2179                    };
2180                    try h.bit_writer.output.writeAll(rem_data_elem);
2181                    h.hasher.update(rem_data_elem);
2182                } else {
2183                    // Put `rem_data_elem` back in `rem_data`
2184                    if (!in_pattern) {
2185                        rem_data = data[vec_elem_i..];
2186                    } else {
2187                        rem_splat += 1;
2188                    }
2189                }
2190                rem_data_elem = undefined; // it is always updated below
2191
2192                // Send through as much of the original vector as possible
2193                var vec_n: usize = 0;
2194                var vlimit = block_limit;
2195                const vec_splat = while (rem_data[vec_n..].len != 1) {
2196                    vlimit = vlimit.subtract(rem_data[vec_n].len) orelse break 1;
2197                    vec_n += 1;
2198                } else vec_splat: {
2199                    // For `pattern.len == 0`, the value of `vec_splat` does not matter.
2200                    const vec_splat = @intFromEnum(vlimit) / @max(1, pattern.len);
2201                    if (pattern.len != 0) assert(vec_splat <= rem_splat + 1);
2202                    vlimit = vlimit.subtract(pattern.len * vec_splat).?;
2203                    vec_n += 1;
2204                    break :vec_splat vec_splat;
2205                };
2206
2207                const n = if (vec_n != 0) n: {
2208                    assert(@intFromEnum(block_limit) - @intFromEnum(vlimit) ==
2209                        Writer.countSplat(rem_data[0..vec_n], vec_splat));
2210                    break :n try h.bit_writer.output.writeSplat(rem_data[0..vec_n], vec_splat);
2211                } else 0; // Still go into the case below to advance the vector
2212                block_limit = block_limit.subtract(n).?;
2213                var consumed: Io.Limit = .limited(n);
2214
2215                while (rem_data.len != 1) {
2216                    const elem = rem_data[0];
2217                    rem_data = rem_data[1..];
2218                    consumed = consumed.subtract(elem.len) orelse {
2219                        h.hasher.update(elem[0..@intFromEnum(consumed)]);
2220                        rem_data_elem = elem[@intFromEnum(consumed)..];
2221                        break;
2222                    };
2223                    h.hasher.update(elem);
2224                } else {
2225                    if (pattern.len == 0) {
2226                        // All of `data` has been consumed. However, the general
2227                        // case below does not work since it divides by zero.
2228                        assert(consumed == .nothing);
2229                        assert(block_limit == .nothing);
2230                        assert(rem_bytes == 0);
2231                        // Since `rem_bytes` and `block_limit` are zero, these won't be used.
2232                        rem_splat = undefined;
2233                        rem_data = undefined;
2234                        rem_data_elem = undefined;
2235                        break;
2236                    }
2237
2238                    const splatted = @intFromEnum(consumed) / pattern.len;
2239                    const partial = @intFromEnum(consumed) % pattern.len;
2240                    for (0..splatted) |_| h.hasher.update(pattern);
2241                    h.hasher.update(pattern[0..partial]);
2242
2243                    const taken_splat = splatted + 1;
2244                    if (rem_splat >= taken_splat) {
2245                        rem_splat -= taken_splat;
2246                        rem_data_elem = pattern[partial..];
2247                    } else {
2248                        // All of `data` has been consumed.
2249                        assert(partial == 0);
2250                        assert(block_limit == .nothing);
2251                        assert(rem_bytes == 0);
2252                        // Since `rem_bytes` and `block_limit` are zero, these won't be used.
2253                        rem_data = undefined;
2254                        rem_data_elem = undefined;
2255                        rem_splat = undefined;
2256                    }
2257                }
2258
2259                if (block_limit == .nothing) break;
2260            }
2261        }
2262
2263        if (rem_bytes > data_bytes) {
2264            assert(rem_bytes - data_bytes == rem_data_elem.len);
2265            assert(&rem_data_elem[0] == &w.buffer[total_bytes - rem_bytes]);
2266        }
2267        return w.consume(total_bytes - rem_bytes);
2268    }
2269
2270    fn flush(w: *Writer) Writer.Error!void {
2271        defer w.* = .failing;
2272        const h: *Huffman = @fieldParentPtr("writer", w);
2273        try Huffman.rebaseInner(w, 0, w.buffer.len, true);
2274        try h.bit_writer.output.rebase(0, 1);
2275        h.bit_writer.byteAlign();
2276        try h.hasher.writeFooter(h.bit_writer.output);
2277    }
2278
2279    fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
2280        errdefer w.* = .failing;
2281        try Huffman.rebaseInner(w, preserve, capacity, false);
2282    }
2283
2284    fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
2285        const h: *Huffman = @fieldParentPtr("writer", w);
2286        assert(preserve + capacity <= w.buffer.len);
2287        if (eos) assert(capacity == w.buffer.len);
2288
2289        const preserved = @min(w.end, preserve);
2290        var remaining = w.buffer[0 .. w.end - preserved];
2291        while (remaining.len > max_tokens) { // not >= so there is always a block down below
2292            const bytes = remaining[0..max_tokens];
2293            remaining = remaining[max_tokens..];
2294            try h.outputBytes(bytes, false);
2295        }
2296
2297        // eos check required for empty block
2298        if (w.buffer.len - (remaining.len + preserved) < capacity or eos) {
2299            const bytes = remaining;
2300            remaining = &.{};
2301            try h.outputBytes(bytes, eos);
2302        }
2303
2304        _ = w.consume(w.end - preserved - remaining.len);
2305    }
2306
2307    fn outputBytes(h: *Huffman, bytes: []const u8, eos: bool) Writer.Error!void {
2308        comptime assert(max_tokens != 65535);
2309        assert(bytes.len <= max_tokens);
2310        var freqs: [257]u16 = @splat(0);
2311        freqs[256] = 1;
2312        for (bytes) |b| freqs[b] += 1;
2313        h.hasher.update(bytes);
2314
2315        var codes_buf: CodesBuf = .init;
2316        if (try h.outputHeader(&freqs, &codes_buf, @intCast(bytes.len), eos)) |table| {
2317            for (bytes) |b| {
2318                try h.bit_writer.write(table.codes[b], table.bits[b]);
2319            }
2320            try h.bit_writer.write(table.codes[256], table.bits[256]);
2321        } else {
2322            try h.bit_writer.output.writeAll(bytes);
2323        }
2324    }
2325
2326    const CodesBuf = struct {
2327        dyn_codes: [258]u16,
2328        dyn_bits: [258]u4,
2329
2330        pub const init: CodesBuf = .{
2331            .dyn_codes = @as([257]u16, undefined) ++ .{0},
2332            .dyn_bits = @as([257]u4, @splat(0)) ++ .{1},
2333        };
2334    };
2335
2336    /// Returns null if the block is stored.
2337    fn outputHeader(
2338        h: *Huffman,
2339        freqs: *const [257]u16,
2340        buf: *CodesBuf,
2341        bytes: u16,
2342        eos: bool,
2343    ) Writer.Error!?struct {
2344        codes: *const [257]u16,
2345        bits: *const [257]u4,
2346    } {
2347        assert(freqs[256] == 1);
2348        const dyn_codes_bitsize, _ = huffman.build(
2349            freqs,
2350            buf.dyn_codes[0..257],
2351            buf.dyn_bits[0..257],
2352            15,
2353            true,
2354        );
2355
2356        var clen_values: [258]u8 = undefined;
2357        var clen_extra: [258]u8 = undefined;
2358        var clen_freqs: [19]u16 = @splat(0);
2359        const clen_len, const clen_extra_bitsize = buildClen(
2360            &buf.dyn_bits,
2361            &clen_values,
2362            &clen_extra,
2363            &clen_freqs,
2364        );
2365
2366        var clen_codes: [19]u16 = undefined;
2367        var clen_bits: [19]u4 = @splat(0);
2368        const clen_codes_bitsize, _ = huffman.build(
2369            &clen_freqs,
2370            &clen_codes,
2371            &clen_bits,
2372            7,
2373            false,
2374        );
2375        const hclen = clenHlen(clen_freqs);
2376
2377        const dynamic_bitsize = @as(u32, 14) +
2378            (4 + @as(u6, hclen)) * 3 + clen_codes_bitsize + clen_extra_bitsize +
2379            dyn_codes_bitsize;
2380        const fixed_bitsize = n: {
2381            const freq7 = 1; // eos
2382            var freq9: u16 = 0;
2383            for (freqs[144..256]) |f| freq9 += f;
2384            const freq8: u16 = bytes - freq9;
2385            break :n @as(u32, freq7) * 7 + @as(u32, freq8) * 8 + @as(u32, freq9) * 9;
2386        };
2387        const stored_bitsize = n: {
2388            const stored_align_bits = -%(h.bit_writer.buffered_n +% 3);
2389            break :n stored_align_bits + @as(u32, 32) + @as(u32, bytes) * 8;
2390        };
2391
2392        //std.debug.print("@ {}{{{}}} ", .{ h.bit_writer.output.end, h.bit_writer.buffered_n });
2393        //std.debug.print("#{} -> s {} f {} d {}\n", .{ bytes, stored_bitsize, fixed_bitsize, dynamic_bitsize });
2394
2395        if (stored_bitsize <= @min(dynamic_bitsize, fixed_bitsize)) {
2396            try h.bit_writer.write(BlockHeader.int(.{ .kind = .stored, .final = eos }), 3);
2397            try h.bit_writer.output.rebase(0, 5);
2398            h.bit_writer.byteAlign();
2399            h.bit_writer.output.writeInt(u16, bytes, .little) catch unreachable;
2400            h.bit_writer.output.writeInt(u16, ~bytes, .little) catch unreachable;
2401            return null;
2402        }
2403
2404        if (fixed_bitsize <= dynamic_bitsize) {
2405            try h.bit_writer.write(BlockHeader.int(.{ .final = eos, .kind = .fixed }), 3);
2406            return .{
2407                .codes = token.fixed_lit_codes[0..257],
2408                .bits = token.fixed_lit_bits[0..257],
2409            };
2410        } else {
2411            try h.bit_writer.write(BlockHeader.Dynamic.int(.{
2412                .regular = .{ .final = eos, .kind = .dynamic },
2413                .hlit = 0,
2414                .hdist = 0,
2415                .hclen = hclen,
2416            }), 17);
2417            try h.bit_writer.writeClen(
2418                hclen,
2419                clen_values[0..clen_len],
2420                clen_extra[0..clen_len],
2421                clen_codes,
2422                clen_bits,
2423            );
2424            return .{ .codes = buf.dyn_codes[0..257], .bits = buf.dyn_bits[0..257] };
2425        }
2426    }
2427};
2428
2429test Huffman {
2430    const fbufs = try testingFreqBufs();
2431    defer if (!builtin.fuzz) std.testing.allocator.destroy(fbufs);
2432    try std.testing.fuzz(fbufs, testFuzzedHuffmanInput, .{});
2433}
2434
2435/// This function is derived from `testFuzzedRawInput` with a few changes for fuzzing `Huffman`.
2436fn testFuzzedHuffmanInput(fbufs: *const [2][65536]u8, input: []const u8) !void {
2437    var in: Io.Reader = .fixed(input);
2438    const opts: packed struct(u19) {
2439        container: PackedContainer,
2440        buf_len: u17,
2441    } = @bitCast(in.takeLeb128(u19) catch 0);
2442    var flate_buf: [2 * 65536]u8 = undefined;
2443    var flate_w: Writer = .fixed(&flate_buf);
2444    var h_buf: [2 * 65536]u8 = undefined;
2445    var h: Huffman = try .init(
2446        &flate_w,
2447        h_buf[0 .. opts.buf_len +% flate.max_window_len],
2448        opts.container.val(),
2449    );
2450
2451    var expected_hash: flate.Container.Hasher = .init(opts.container.val());
2452    var expected_size: u32 = 0;
2453    var vecs: [32][]const u8 = undefined;
2454    var vecs_n: usize = 0;
2455
2456    while (in.seek != in.end) {
2457        const VecInfo = packed struct(u55) {
2458            output: bool,
2459            /// If set, `data_len` and `splat` are reinterpreted as `capacity`
2460            /// and `preserve_len` respectively and `output` is treated as set.
2461            rebase: bool,
2462            block_aligning_len: bool,
2463            block_aligning_splat: bool,
2464            data_off_hi: u8,
2465            random_data: u1,
2466            data_len: u16,
2467            splat: u18,
2468            /// This is less useful as each value is part of the same gradient 'step'
2469            data_off_lo: u8,
2470        };
2471        var vec_info: VecInfo = @bitCast(in.takeLeb128(u55) catch |e| switch (e) {
2472            error.ReadFailed => unreachable,
2473            error.Overflow, error.EndOfStream => 0,
2474        });
2475
2476        {
2477            const buffered = h.writer.buffered().len + countVec(vecs[0..vecs_n]);
2478            const to_align = mem.alignForwardAnyAlign(usize, buffered, Huffman.max_tokens) - buffered;
2479            assert((buffered + to_align) % Huffman.max_tokens == 0);
2480
2481            if (vec_info.block_aligning_len) {
2482                vec_info.data_len = @intCast(to_align);
2483            } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and
2484                to_align % vec_info.data_len == 0)
2485            {
2486                vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1;
2487            }
2488        }
2489
2490        var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1;
2491        add_vec: {
2492            if (vec_info.rebase) break :add_vec;
2493            if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) > 4 * (1 << 16)) {
2494                // Skip this vector to avoid this test taking too long.
2495                splat = 1;
2496                break :add_vec;
2497            }
2498
2499            const data_buf = &fbufs[vec_info.random_data];
2500            vecs[vecs_n] = data_buf[@min(
2501                (@as(u16, vec_info.data_off_hi) << 8) | vec_info.data_off_lo,
2502                data_buf.len - vec_info.data_len,
2503            )..][0..vec_info.data_len];
2504
2505            for (0..splat) |_| expected_hash.update(vecs[vecs_n]);
2506            expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat;
2507            vecs_n += 1;
2508        }
2509
2510        const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or
2511            in.seek == in.end;
2512        if (want_drain and vecs_n != 0) {
2513            var n = h.writer.buffered().len + Writer.countSplat(vecs[0..vecs_n], splat);
2514            const oos = h.writer.writeSplatAll(vecs[0..vecs_n], splat) == error.WriteFailed;
2515            n -= h.writer.buffered().len;
2516            const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable;
2517            const lim = flate_w.end + 6 * block_lim + n; // 6 since block header may span two bytes
2518            if (flate_w.end > lim) return error.OverheadTooLarge;
2519            if (oos) return;
2520
2521            vecs_n = 0;
2522        } else assert(splat == 1);
2523
2524        if (vec_info.rebase) {
2525            const old_end = flate_w.end;
2526            var n = h.writer.buffered().len;
2527            const oos = h.writer.rebase(vec_info.data_len, @min(
2528                h.writer.buffer.len -| vec_info.data_len,
2529                vec_info.splat,
2530            )) == error.WriteFailed;
2531            n -= h.writer.buffered().len;
2532            const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable;
2533            const lim = old_end + 6 * block_lim + n; // 6 since block header may span two bytes
2534            if (flate_w.end > lim) return error.OverheadTooLarge;
2535            if (oos) return;
2536        }
2537    }
2538
2539    {
2540        const old_end = flate_w.end;
2541        const n = h.writer.buffered().len;
2542        const oos = h.writer.flush() == error.WriteFailed;
2543        assert(h.writer.buffered().len == 0);
2544        const block_lim = @max(1, math.divCeil(usize, n, Huffman.max_tokens) catch unreachable);
2545        const lim = old_end + 6 * block_lim + n + opts.container.val().footerSize();
2546        if (flate_w.end > lim) return error.OverheadTooLarge;
2547        if (oos) return;
2548    }
2549
2550    try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash);
2551}