master
1//! Allocates statically ~224K (128K lookup, 96K tokens).
2//!
3//! The source of an `error.WriteFailed` is always the backing writer. After an
4//! `error.WriteFailed`, the `.writer` becomes `.failing` and is unrecoverable.
5//! After a `flush`, the writer also becomes `.failing` since the stream has
6//! been finished. This behavior also applies to `Raw` and `Huffman`.
7
8// Implementation details:
9// A chained hash table is used to find matches. `drain` always preserves `flate.history_len`
10// bytes to use as a history and avoids tokenizing the final bytes since they can be part of
11// a longer match with unwritten bytes (unless it is a `flush`). The minimum match searched
12// for is of length `seq_bytes`. If a match is made, a longer match is also checked for at
13// the next byte (lazy matching) if the last match does not meet the `Options.lazy` threshold.
14//
15// Up to `block_token` tokens are accumalated in `buffered_tokens` and are outputted in
16// `write_block` which determines the optimal block type and frequencies.
17
18const builtin = @import("builtin");
19const std = @import("std");
20const mem = std.mem;
21const math = std.math;
22const assert = std.debug.assert;
23const Io = std.Io;
24const Writer = Io.Writer;
25
26const Compress = @This();
27const token = @import("token.zig");
28const flate = @import("../flate.zig");
29
30/// Until #104 is implemented, a ?u15 takes 4 bytes, which is unacceptable
31/// as it doubles the size of this already massive structure.
32///
33/// Also, there are no `to` / `from` methods because LLVM 21 does not
34/// optimize away the conversion from and to `?u15`.
35const PackedOptionalU15 = packed struct(u16) {
36 value: u15,
37 is_null: bool,
38
39 pub fn int(p: PackedOptionalU15) u16 {
40 return @bitCast(p);
41 }
42
43 pub const null_bit: PackedOptionalU15 = .{ .value = 0, .is_null = true };
44};
45
46/// After `flush` is called, all vtable calls with result in `error.WriteFailed.`
47writer: Writer,
48has_history: bool,
49bit_writer: BitWriter,
50buffered_tokens: struct {
51 /// List of `TokenBufferEntryHeader`s and their trailing data.
52 list: [@as(usize, block_tokens) * 3]u8,
53 pos: u32,
54 n: u16,
55 lit_freqs: [286]u16,
56 dist_freqs: [30]u16,
57
58 pub const empty: @This() = .{
59 .list = undefined,
60 .pos = 0,
61 .n = 0,
62 .lit_freqs = @splat(0),
63 .dist_freqs = @splat(0),
64 };
65},
66lookup: struct {
67 /// Indexes are the hashes of four-bytes sequences.
68 ///
69 /// Values are the positions in `chain` of the previous four bytes with the same hash.
70 head: [1 << lookup_hash_bits]PackedOptionalU15,
71 /// Values are the non-zero number of bytes backwards in the history with the same hash.
72 ///
73 /// The relationship of chain indexes and bytes relative to the latest history byte is
74 /// `chain_pos -% chain_index = history_index`.
75 chain: [32768]PackedOptionalU15,
76 /// The index in `chain` which is of the newest byte of the history.
77 chain_pos: u15,
78},
79container: flate.Container,
80hasher: flate.Container.Hasher,
81opts: Options,
82
83const BitWriter = struct {
84 output: *Writer,
85 buffered: u7,
86 buffered_n: u3,
87
88 pub fn init(w: *Writer) BitWriter {
89 return .{
90 .output = w,
91 .buffered = 0,
92 .buffered_n = 0,
93 };
94 }
95
96 /// Asserts `bits` is zero-extended
97 pub fn write(b: *BitWriter, bits: u56, n: u6) Writer.Error!void {
98 assert(@as(u8, b.buffered) >> b.buffered_n == 0);
99 assert(@as(u57, bits) >> n == 0); // n may be 56 so u57 is needed
100 const combined = @shlExact(@as(u64, bits), b.buffered_n) | b.buffered;
101 const combined_bits = @as(u6, b.buffered_n) + n;
102
103 const out = try b.output.writableSliceGreedy(8);
104 mem.writeInt(u64, out[0..8], combined, .little);
105 b.output.advance(combined_bits / 8);
106
107 b.buffered_n = @truncate(combined_bits);
108 b.buffered = @intCast(combined >> (combined_bits - b.buffered_n));
109 }
110
111 /// Assserts one byte can be written to `b.otuput` without rebasing.
112 pub fn byteAlign(b: *BitWriter) void {
113 b.output.unusedCapacitySlice()[0] = b.buffered;
114 b.output.advance(@intFromBool(b.buffered_n != 0));
115 b.buffered = 0;
116 b.buffered_n = 0;
117 }
118
119 pub fn writeClen(
120 b: *BitWriter,
121 hclen: u4,
122 clen_values: []u8,
123 clen_extra: []u8,
124 clen_codes: [19]u16,
125 clen_bits: [19]u4,
126 ) Writer.Error!void {
127 // Write the first four clen entries seperately since they are always present,
128 // and writing them all at once takes too many bits.
129 try b.write(clen_bits[token.codegen_order[0]] |
130 @shlExact(@as(u6, clen_bits[token.codegen_order[1]]), 3) |
131 @shlExact(@as(u9, clen_bits[token.codegen_order[2]]), 6) |
132 @shlExact(@as(u12, clen_bits[token.codegen_order[3]]), 9), 12);
133
134 var i = hclen;
135 var clen_bits_table: u45 = 0;
136 while (i != 0) {
137 i -= 1;
138 clen_bits_table <<= 3;
139 clen_bits_table |= clen_bits[token.codegen_order[4..][i]];
140 }
141 try b.write(clen_bits_table, @as(u6, hclen) * 3);
142
143 for (clen_values, clen_extra) |value, extra| {
144 try b.write(
145 clen_codes[value] | @shlExact(@as(u16, extra), clen_bits[value]),
146 clen_bits[value] + @as(u3, switch (value) {
147 0...15 => 0,
148 16 => 2,
149 17 => 3,
150 18 => 7,
151 else => unreachable,
152 }),
153 );
154 }
155 }
156};
157
158/// Number of tokens to accumulate before outputing as a block.
159/// The maximum value is `math.maxInt(u16) - 1` since one token is reserved for end-of-block.
160const block_tokens: u16 = 1 << 15;
161const lookup_hash_bits = 15;
162const Hash = u16; // `u[lookup_hash_bits]` is not used due to worse optimization (with LLVM 21)
163const seq_bytes = 3; // not intended to be changed
164const Seq = std.meta.Int(.unsigned, seq_bytes * 8);
165
166const TokenBufferEntryHeader = packed struct(u16) {
167 kind: enum(u1) {
168 /// Followed by non-zero `data` byte literals.
169 bytes,
170 /// Followed by the length as a byte
171 match,
172 },
173 data: u15,
174};
175
176const BlockHeader = packed struct(u3) {
177 final: bool,
178 kind: enum(u2) { stored, fixed, dynamic, _ },
179
180 pub fn int(h: BlockHeader) u3 {
181 return @bitCast(h);
182 }
183
184 pub const Dynamic = packed struct(u17) {
185 regular: BlockHeader,
186 hlit: u5,
187 hdist: u5,
188 hclen: u4,
189
190 pub fn int(h: Dynamic) u17 {
191 return @bitCast(h);
192 }
193 };
194};
195
196fn outputMatch(c: *Compress, dist: u15, len: u8) Writer.Error!void {
197 // This must come first. Instead of ensuring a full block is never left buffered,
198 // draining it is defered to allow end of stream to be indicated.
199 if (c.buffered_tokens.n == block_tokens) {
200 @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
201 try c.writeBlock(false);
202 }
203 const header: TokenBufferEntryHeader = .{ .kind = .match, .data = dist };
204 c.buffered_tokens.list[c.buffered_tokens.pos..][0..2].* = @bitCast(header);
205 c.buffered_tokens.list[c.buffered_tokens.pos + 2] = len;
206 c.buffered_tokens.pos += 3;
207 c.buffered_tokens.n += 1;
208
209 c.buffered_tokens.lit_freqs[@as(usize, 257) + token.LenCode.fromVal(len).toInt()] += 1;
210 c.buffered_tokens.dist_freqs[token.DistCode.fromVal(dist).toInt()] += 1;
211}
212
213fn outputBytes(c: *Compress, bytes: []const u8) Writer.Error!void {
214 var remaining = bytes;
215 while (remaining.len != 0) {
216 if (c.buffered_tokens.n == block_tokens) {
217 @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
218 try c.writeBlock(false);
219 }
220
221 const n = @min(remaining.len, block_tokens - c.buffered_tokens.n, math.maxInt(u15));
222 assert(n != 0);
223 const header: TokenBufferEntryHeader = .{ .kind = .bytes, .data = n };
224 c.buffered_tokens.list[c.buffered_tokens.pos..][0..2].* = @bitCast(header);
225 @memcpy(c.buffered_tokens.list[c.buffered_tokens.pos + 2 ..][0..n], remaining[0..n]);
226 c.buffered_tokens.pos += @as(u32, 2) + n;
227 c.buffered_tokens.n += n;
228
229 for (remaining[0..n]) |b| {
230 c.buffered_tokens.lit_freqs[b] += 1;
231 }
232 remaining = remaining[n..];
233 }
234}
235
236fn hash(x: u32) Hash {
237 return @intCast((x *% 0x9E3779B1) >> (32 - lookup_hash_bits));
238}
239
240/// Trades between speed and compression size.
241///
242/// Default paramaters are [taken from zlib]
243/// (https://github.com/madler/zlib/blob/v1.3.1/deflate.c#L112)
244pub const Options = struct {
245 /// Perform less lookups when a match of at least this length has been found.
246 good: u16,
247 /// Stop when a match of at least this length has been found.
248 nice: u16,
249 /// Don't attempt a lazy match find when a match of at least this length has been found.
250 lazy: u16,
251 /// Check this many previous locations with the same hash for longer matches.
252 chain: u16,
253
254 // zig fmt: off
255 pub const level_1: Options = .{ .good = 4, .nice = 8, .lazy = 0, .chain = 4 };
256 pub const level_2: Options = .{ .good = 4, .nice = 16, .lazy = 0, .chain = 8 };
257 pub const level_3: Options = .{ .good = 4, .nice = 32, .lazy = 0, .chain = 32 };
258 pub const level_4: Options = .{ .good = 4, .nice = 16, .lazy = 4, .chain = 16 };
259 pub const level_5: Options = .{ .good = 8, .nice = 32, .lazy = 16, .chain = 32 };
260 pub const level_6: Options = .{ .good = 8, .nice = 128, .lazy = 16, .chain = 128 };
261 pub const level_7: Options = .{ .good = 8, .nice = 128, .lazy = 32, .chain = 256 };
262 pub const level_8: Options = .{ .good = 32, .nice = 258, .lazy = 128, .chain = 1024 };
263 pub const level_9: Options = .{ .good = 32, .nice = 258, .lazy = 258, .chain = 4096 };
264 // zig fmt: on
265 pub const fastest = level_1;
266 pub const default = level_6;
267 pub const best = level_9;
268};
269
270/// It is asserted `buffer` is least `flate.max_history_len` bytes.
271/// It is asserted `output` has a capacity of at least 8 bytes.
272pub fn init(
273 output: *Writer,
274 buffer: []u8,
275 container: flate.Container,
276 opts: Options,
277) Writer.Error!Compress {
278 assert(output.buffer.len > 8);
279 assert(buffer.len >= flate.max_window_len);
280
281 // note that disallowing some of these simplifies matching logic
282 assert(opts.chain != 0); // use `Huffman`, disallowing this simplies matching
283 assert(opts.good >= 3 and opts.nice >= 3); // a match will (usually) not be found
284 assert(opts.good <= 258 and opts.nice <= 258); // a longer match will not be found
285 assert(opts.lazy <= opts.nice); // a longer match will (usually) not be found
286 if (opts.good <= opts.lazy) assert(opts.chain >= 1 << 2); // chain can be reduced to zero
287
288 try output.writeAll(container.header());
289 return .{
290 .writer = .{
291 .buffer = buffer,
292 .vtable = &.{
293 .drain = drain,
294 .flush = flush,
295 .rebase = rebase,
296 },
297 },
298 .has_history = false,
299 .bit_writer = .init(output),
300 .buffered_tokens = .empty,
301 .lookup = .{
302 // init `value` is max so there is 0xff pattern
303 .head = @splat(.{ .value = math.maxInt(u15), .is_null = true }),
304 .chain = undefined,
305 .chain_pos = math.maxInt(u15),
306 },
307 .container = container,
308 .opts = opts,
309 .hasher = .init(container),
310 };
311}
312
313fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
314 errdefer w.* = .failing;
315 // There may have not been enough space in the buffer and the write was sent directly here.
316 // However, it is required that all data goes through the buffer to keep a history.
317 //
318 // Additionally, ensuring the buffer is always full ensures there is always a full history
319 // after.
320 const data_n = w.buffer.len - w.end;
321 _ = w.fixedDrain(data, splat) catch {};
322 assert(w.end == w.buffer.len);
323 try rebaseInner(w, 0, 1, false);
324 return data_n;
325}
326
327fn flush(w: *Writer) Writer.Error!void {
328 defer w.* = .failing;
329 const c: *Compress = @fieldParentPtr("writer", w);
330 try rebaseInner(w, 0, w.buffer.len - flate.history_len, true);
331 try c.bit_writer.output.rebase(0, 1);
332 c.bit_writer.byteAlign();
333 try c.hasher.writeFooter(c.bit_writer.output);
334}
335
336fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
337 return rebaseInner(w, preserve, capacity, false);
338}
339
340pub const rebase_min_preserve = flate.history_len;
341pub const rebase_reserved_capacity = (token.max_length + 1) + seq_bytes;
342
343fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
344 if (!eos) {
345 assert(@max(preserve, rebase_min_preserve) + (capacity + rebase_reserved_capacity) <= w.buffer.len);
346 assert(w.end >= flate.history_len + rebase_reserved_capacity); // Above assert should
347 // fail since rebase is only called when `capacity` is not present. This assertion is
348 // important because a full history is required at the end.
349 } else {
350 assert(preserve == 0 and capacity == w.buffer.len - flate.history_len);
351 }
352
353 const c: *Compress = @fieldParentPtr("writer", w);
354 const buffered = w.buffered();
355
356 const start = @as(usize, flate.history_len) * @intFromBool(c.has_history);
357 const lit_end: usize = if (!eos)
358 buffered.len - rebase_reserved_capacity - (preserve -| flate.history_len)
359 else
360 buffered.len -| (seq_bytes - 1);
361
362 var i = start;
363 var last_unmatched = i;
364 // Read from `w.buffer` instead of `buffered` since the latter may not
365 // have enough bytes. If this is the case, this variable is not used.
366 var seq: Seq = mem.readInt(
367 std.meta.Int(.unsigned, (seq_bytes - 1) * 8),
368 w.buffer[i..][0 .. seq_bytes - 1],
369 .big,
370 );
371 if (buffered[i..].len < seq_bytes - 1) {
372 @branchHint(.unlikely);
373 assert(eos);
374 seq = undefined;
375 assert(i >= lit_end);
376 }
377
378 while (i < lit_end) {
379 var match_start = i;
380 seq <<= 8;
381 seq |= buffered[i + (seq_bytes - 1)];
382 var match = c.matchAndAddHash(i, hash(seq), token.min_length - 1, c.opts.chain, c.opts.good);
383 i += 1;
384 if (match.len < token.min_length) continue;
385
386 var match_unadded = match.len - 1;
387 lazy: {
388 if (match.len >= c.opts.lazy) break :lazy;
389 if (match.len >= c.writer.buffered()[i..].len) {
390 @branchHint(.unlikely); // Only end of stream
391 break :lazy;
392 }
393
394 var chain = c.opts.chain;
395 var good = c.opts.good;
396 if (match.len >= good) {
397 chain >>= 2;
398 good = math.maxInt(u8); // Reduce only once
399 }
400
401 seq <<= 8;
402 seq |= buffered[i + (seq_bytes - 1)];
403 const lazy = c.matchAndAddHash(i, hash(seq), match.len, chain, good);
404 match_unadded -= 1;
405 i += 1;
406
407 if (lazy.len > match.len) {
408 match_start += 1;
409 match = lazy;
410 match_unadded = match.len - 1;
411 }
412 }
413
414 assert(i + match_unadded == match_start + match.len);
415 assert(mem.eql(
416 u8,
417 buffered[match_start..][0..match.len],
418 buffered[match_start - 1 - match.dist ..][0..match.len],
419 )); // This assert also seems to help codegen.
420
421 try c.outputBytes(buffered[last_unmatched..match_start]);
422 try c.outputMatch(@intCast(match.dist), @intCast(match.len - 3));
423
424 last_unmatched = match_start + match.len;
425 if (last_unmatched + seq_bytes >= w.end) {
426 @branchHint(.unlikely);
427 assert(eos);
428 i = undefined;
429 break;
430 }
431
432 while (true) {
433 seq <<= 8;
434 seq |= buffered[i + (seq_bytes - 1)];
435 _ = c.addHash(i, hash(seq));
436 i += 1;
437
438 match_unadded -= 1;
439 if (match_unadded == 0) break;
440 }
441 assert(i == match_start + match.len);
442 }
443
444 if (eos) {
445 i = undefined; // (from match hashing logic)
446 try c.outputBytes(buffered[last_unmatched..]);
447 c.hasher.update(buffered[start..]);
448 try c.writeBlock(true);
449 return;
450 }
451
452 try c.outputBytes(buffered[last_unmatched..i]);
453 c.hasher.update(buffered[start..i]);
454
455 const preserved = buffered[i - flate.history_len ..];
456 assert(preserved.len > @max(rebase_min_preserve, preserve));
457 @memmove(w.buffer[0..preserved.len], preserved);
458 w.end = preserved.len;
459 c.has_history = true;
460}
461
462fn addHash(c: *Compress, i: usize, h: Hash) void {
463 assert(h == hash(mem.readInt(Seq, c.writer.buffer[i..][0..seq_bytes], .big)));
464
465 const l = &c.lookup;
466 l.chain_pos +%= 1;
467
468 // Equivilent to the below, however LLVM 21 does not optimize `@subWithOverflow` well at all.
469 // const replaced_i, const no_replace = @subWithOverflow(i, flate.history_len);
470 // if (no_replace == 0) {
471 if (i >= flate.history_len) {
472 @branchHint(.likely);
473 const replaced_i = i - flate.history_len;
474 // The following is the same as the below except uses a 32-bit load to help optimizations
475 // const replaced_seq = mem.readInt(Seq, c.writer.buffer[replaced_i..][0..seq_bytes], .big);
476 comptime assert(@sizeOf(Seq) <= @sizeOf(u32));
477 const replaced_u32 = mem.readInt(u32, c.writer.buffered()[replaced_i..][0..4], .big);
478 const replaced_seq: Seq = @intCast(replaced_u32 >> (32 - @bitSizeOf(Seq)));
479
480 const replaced_h = hash(replaced_seq);
481 // The following is equivilent to the below since LLVM 21 doesn't optimize it well.
482 // l.head[replaced_h].is_null = l.head[replaced_h].is_null or
483 // l.head[replaced_h].int() == l.chain_pos;
484 const empty_head = l.head[replaced_h].int() == l.chain_pos;
485 const null_flag = PackedOptionalU15.int(.{ .is_null = empty_head, .value = 0 });
486 l.head[replaced_h] = @bitCast(l.head[replaced_h].int() | null_flag);
487 }
488
489 const prev_chain_index = l.head[h];
490 l.chain[l.chain_pos] = @bitCast((l.chain_pos -% prev_chain_index.value) |
491 (prev_chain_index.int() & PackedOptionalU15.null_bit.int())); // Preserves null
492 l.head[h] = .{ .value = l.chain_pos, .is_null = false };
493}
494
495/// If the match is shorter, the returned value can be any value `<= old`.
496fn betterMatchLen(old: u16, prev: []const u8, bytes: []const u8) u16 {
497 assert(old < @min(bytes.len, token.max_length));
498 assert(prev.len >= bytes.len);
499 assert(bytes.len >= token.min_length);
500
501 var i: u16 = 0;
502 const Block = std.meta.Int(.unsigned, @min(math.divCeil(
503 comptime_int,
504 math.ceilPowerOfTwoAssert(usize, @bitSizeOf(usize)),
505 8,
506 ) catch unreachable, 256) * 8);
507
508 if (bytes.len < token.max_length) {
509 @branchHint(.unlikely); // Only end of stream
510
511 while (bytes[i..].len >= @sizeOf(Block)) {
512 const a = mem.readInt(Block, prev[i..][0..@sizeOf(Block)], .little);
513 const b = mem.readInt(Block, bytes[i..][0..@sizeOf(Block)], .little);
514 const diff = a ^ b;
515 if (diff != 0) {
516 @branchHint(.likely);
517 i += @ctz(diff) / 8;
518 return i;
519 }
520 i += @sizeOf(Block);
521 }
522
523 while (i != bytes.len and prev[i] == bytes[i]) {
524 i += 1;
525 }
526 assert(i < token.max_length);
527 return i;
528 }
529
530 if (old >= @sizeOf(Block)) {
531 // Check that a longer end is present, otherwise the match is always worse
532 const a = mem.readInt(Block, prev[old + 1 - @sizeOf(Block) ..][0..@sizeOf(Block)], .little);
533 const b = mem.readInt(Block, bytes[old + 1 - @sizeOf(Block) ..][0..@sizeOf(Block)], .little);
534 if (a != b) return i;
535 }
536
537 while (true) {
538 const a = mem.readInt(Block, prev[i..][0..@sizeOf(Block)], .little);
539 const b = mem.readInt(Block, bytes[i..][0..@sizeOf(Block)], .little);
540 const diff = a ^ b;
541 if (diff != 0) {
542 i += @ctz(diff) / 8;
543 return i;
544 }
545 i += @sizeOf(Block);
546 if (i == 256) break;
547 }
548
549 const a = mem.readInt(u16, prev[i..][0..2], .little);
550 const b = mem.readInt(u16, bytes[i..][0..2], .little);
551 const diff = a ^ b;
552 i += @ctz(diff) / 8;
553 assert(i <= token.max_length);
554 return i;
555}
556
557test betterMatchLen {
558 try std.testing.fuzz({}, testFuzzedMatchLen, .{});
559}
560
561fn testFuzzedMatchLen(_: void, input: []const u8) !void {
562 @disableInstrumentation();
563 var r: Io.Reader = .fixed(input);
564 var buf: [1024]u8 = undefined;
565 var w: Writer = .fixed(&buf);
566 var old = r.takeLeb128(u9) catch 0;
567 var bytes_off = @max(1, r.takeLeb128(u10) catch 258);
568 const prev_back = @max(1, r.takeLeb128(u10) catch 258);
569
570 while (r.takeByte()) |byte| {
571 const op: packed struct(u8) {
572 kind: enum(u2) { splat, copy, insert_imm, insert },
573 imm: u6,
574
575 pub fn immOrByte(op_s: @This(), r_s: *Io.Reader) usize {
576 return if (op_s.imm == 0) op_s.imm else @as(usize, r_s.takeByte() catch 0) + 64;
577 }
578 } = @bitCast(byte);
579 (switch (op.kind) {
580 .splat => w.splatByteAll(r.takeByte() catch 0, op.immOrByte(&r)),
581 .copy => write: {
582 const start = w.buffered().len -| op.immOrByte(&r);
583 const len = @min(w.buffered().len - start, r.takeByte() catch 3);
584 break :write w.writeAll(w.buffered()[start..][0..len]);
585 },
586 .insert_imm => w.writeByte(op.imm),
587 .insert => w.writeAll(r.take(
588 @min(r.bufferedLen(), @as(usize, op.imm) + 1),
589 ) catch unreachable),
590 }) catch break;
591 } else |_| {}
592
593 w.splatByteAll(0, (1 + 3) -| w.buffered().len) catch unreachable;
594 bytes_off = @min(bytes_off, @as(u10, @intCast(w.buffered().len - 3)));
595 const prev_off = bytes_off -| prev_back;
596 assert(prev_off < bytes_off);
597 const prev = w.buffered()[prev_off..];
598 const bytes = w.buffered()[bytes_off..];
599 old = @min(old, bytes.len - 1, token.max_length - 1);
600
601 const diff_index = mem.indexOfDiff(u8, prev, bytes).?; // unwrap since lengths are not same
602 const expected_len = @min(diff_index, 258);
603 errdefer std.debug.print(
604 \\prev : '{any}'
605 \\bytes: '{any}'
606 \\old : {}
607 \\expected: {?}
608 \\actual : {}
609 ++ "\n", .{
610 prev, bytes, old,
611 if (old < expected_len) expected_len else null, betterMatchLen(old, prev, bytes),
612 });
613 if (old < expected_len) {
614 try std.testing.expectEqual(expected_len, betterMatchLen(old, prev, bytes));
615 } else {
616 try std.testing.expect(betterMatchLen(old, prev, bytes) <= old);
617 }
618}
619
620fn matchAndAddHash(c: *Compress, i: usize, h: Hash, gt: u16, max_chain: u16, good_: u16) struct {
621 dist: u16,
622 len: u16,
623} {
624 const l = &c.lookup;
625 const buffered = c.writer.buffered();
626
627 var chain_limit = max_chain;
628 var best_dist: u16 = undefined;
629 var best_len = gt;
630 const nice = @min(c.opts.nice, buffered[i..].len);
631 var good = good_;
632
633 search: {
634 if (l.head[h].is_null) break :search;
635 // Actually a u15, but LLVM 21 does not optimize that as well (it truncates it each use).
636 var dist: u16 = l.chain_pos -% l.head[h].value;
637 while (true) {
638 chain_limit -= 1;
639
640 const match_len = betterMatchLen(best_len, buffered[i - 1 - dist ..], buffered[i..]);
641 if (match_len > best_len) {
642 best_dist = dist;
643 best_len = match_len;
644 if (best_len >= nice) break;
645 if (best_len >= good) {
646 chain_limit >>= 2;
647 good = math.maxInt(u8); // Reduce only once
648 }
649 }
650
651 if (chain_limit == 0) break;
652 const next_chain_index = l.chain_pos -% @as(u15, @intCast(dist));
653 // Equivilent to the below, however LLVM 21 optimizes the below worse.
654 // if (l.chain[next_chain_index].is_null) break;
655 // dist, const out_of_window = @addWithOverflow(dist, l.chain[next_chain_index].value);
656 // if (out_of_window == 1) break;
657 dist +%= l.chain[next_chain_index].int(); // wrapping for potential null bit
658 comptime assert(flate.history_len == PackedOptionalU15.int(.null_bit));
659 // Also, doing >= flate.history_len gives worse codegen with LLVM 21.
660 if ((dist | l.chain[next_chain_index].int()) & flate.history_len != 0) break;
661 }
662 }
663
664 c.addHash(i, h);
665 return .{ .dist = best_dist, .len = best_len };
666}
667
668fn clenHlen(freqs: [19]u16) u4 {
669 // Note that the first four codes (16, 17, 18, and 0) are always present.
670 if (builtin.mode != .ReleaseSmall and (std.simd.suggestVectorLength(u16) orelse 1) >= 8) {
671 const V = @Vector(16, u16);
672 const hlen_mul: V = comptime m: {
673 var hlen_mul: [16]u16 = undefined;
674 for (token.codegen_order[3..], 0..) |i, hlen| {
675 hlen_mul[i] = hlen;
676 }
677 break :m hlen_mul;
678 };
679 const encoded = freqs[0..16].* != @as(V, @splat(0));
680 return @intCast(@reduce(.Max, @intFromBool(encoded) * hlen_mul));
681 } else {
682 var max: u4 = 0;
683 for (token.codegen_order[4..], 1..) |i, len| {
684 max = if (freqs[i] == 0) max else @intCast(len);
685 }
686 return max;
687 }
688}
689
690test clenHlen {
691 var freqs: [19]u16 = @splat(0);
692 try std.testing.expectEqual(0, clenHlen(freqs));
693 for (token.codegen_order, 1..) |i, len| {
694 freqs[i] = 1;
695 try std.testing.expectEqual(len -| 4, clenHlen(freqs));
696 freqs[i] = 0;
697 }
698}
699
700/// Returns the number of values followed by the bitsize of the extra bits.
701fn buildClen(
702 dyn_bits: []const u4,
703 out_values: []u8,
704 out_extra: []u8,
705 out_freqs: *[19]u16,
706) struct { u16, u16 } {
707 assert(dyn_bits.len <= out_values.len);
708 assert(out_values.len == out_extra.len);
709
710 var len: u16 = 0;
711 var extra_bitsize: u16 = 0;
712
713 var remaining_bits = dyn_bits;
714 var prev: u4 = 0;
715 while (true) {
716 const b = remaining_bits[0];
717 const n_max = @min(@as(u8, if (b != 0)
718 if (b != prev) 1 else 6
719 else
720 138), remaining_bits.len);
721 prev = b;
722
723 var n: u8 = 0;
724 while (true) {
725 remaining_bits = remaining_bits[1..];
726 n += 1;
727 if (n == n_max or remaining_bits[0] != b) break;
728 }
729 const code, const extra, const xsize = switch (n) {
730 0 => unreachable,
731 1...2 => .{ b, 0, 0 },
732 3...10 => .{
733 @as(u8, 16) + @intFromBool(b == 0),
734 n - 3,
735 @as(u8, 2) + @intFromBool(b == 0),
736 },
737 11...138 => .{ 18, n - 11, 7 },
738 else => unreachable,
739 };
740 while (true) {
741 out_values[len] = code;
742 out_extra[len] = extra;
743 out_freqs[code] += 1;
744 extra_bitsize += xsize;
745 len += 1;
746 if (n != 2) {
747 @branchHint(.likely);
748 break;
749 }
750 // Code needs outputted once more
751 n = 1;
752 }
753 if (remaining_bits.len == 0) break;
754 }
755
756 return .{ len, extra_bitsize };
757}
758
759test buildClen {
760 //dyn_bits: []u4,
761 //out_values: *[288 + 30]u8,
762 //out_extra: *[288 + 30]u8,
763 //out_freqs: *[19]u16,
764 //struct { u16, u16 }
765 var out_values: [288 + 30]u8 = undefined;
766 var out_extra: [288 + 30]u8 = undefined;
767 var out_freqs: [19]u16 = @splat(0);
768 const len, const extra_bitsize = buildClen(&([_]u4{
769 1, // A
770 2, 2, // B
771 3, 3, 3, // C
772 4, 4, 4, 4, // D
773 5, // E
774 5, 5, 5, 5, 5, 5, //
775 5, 5, 5, 5, 5, 5,
776 5, 5,
777 0, 1, // F
778 0, 0, 1, // G
779 } ++ @as([138 + 10]u4, @splat(0)) // H
780 ), &out_values, &out_extra, &out_freqs);
781 try std.testing.expectEqualSlices(u8, &.{
782 1, // A
783 2, 2, // B
784 3, 3, 3, // C
785 4, 16, // D
786 5, 16, 16, 5, 5, // E
787 0, 1, // F
788 0, 0, 1, // G
789 18, 17, // H
790 }, out_values[0..len]);
791 try std.testing.expectEqualSlices(u8, &.{
792 0, // A
793 0, 0, // B
794 0, 0, 0, // C
795 0, (0), // D
796 0, (3), (3), 0, 0, // E
797 0, 0, // F
798 0, 0, 0, // G
799 (127), (7), // H
800 }, out_extra[0..len]);
801 try std.testing.expectEqual(2 + 2 + 2 + 7 + 3, extra_bitsize);
802 try std.testing.expectEqualSlices(u16, &.{
803 3, 3, 2, 3, 1, 3, 0, 0,
804 0, 0, 0, 0, 0, 0, 0, 0,
805 3, 1, 1,
806 }, &out_freqs);
807}
808
809fn writeBlock(c: *Compress, eos: bool) Writer.Error!void {
810 const toks = &c.buffered_tokens;
811 if (!eos) assert(toks.n == block_tokens);
812 assert(toks.lit_freqs[256] == 0);
813 toks.lit_freqs[256] = 1;
814
815 var dyn_codes_buf: [286 + 30]u16 = undefined;
816 var dyn_bits_buf: [286 + 30]u4 = @splat(0);
817
818 const dyn_lit_codes_bitsize, const dyn_last_lit = huffman.build(
819 &toks.lit_freqs,
820 dyn_codes_buf[0..286],
821 dyn_bits_buf[0..286],
822 15,
823 true,
824 );
825 const dyn_lit_len = @max(257, dyn_last_lit + 1);
826
827 const dyn_dist_codes_bitsize, const dyn_last_dist = huffman.build(
828 &toks.dist_freqs,
829 dyn_codes_buf[dyn_lit_len..][0..30],
830 dyn_bits_buf[dyn_lit_len..][0..30],
831 15,
832 true,
833 );
834 const dyn_dist_len = @max(1, dyn_last_dist + 1);
835
836 var clen_values: [288 + 30]u8 = undefined;
837 var clen_extra: [288 + 30]u8 = undefined;
838 var clen_freqs: [19]u16 = @splat(0);
839 const clen_len, const clen_extra_bitsize = buildClen(
840 dyn_bits_buf[0 .. dyn_lit_len + dyn_dist_len],
841 &clen_values,
842 &clen_extra,
843 &clen_freqs,
844 );
845
846 var clen_codes: [19]u16 = undefined;
847 var clen_bits: [19]u4 = @splat(0);
848 const clen_codes_bitsize, _ = huffman.build(
849 &clen_freqs,
850 &clen_codes,
851 &clen_bits,
852 7,
853 false,
854 );
855 const hclen = clenHlen(clen_freqs);
856
857 const dynamic_bitsize = @as(u32, 14) +
858 (4 + @as(u6, hclen)) * 3 + clen_codes_bitsize + clen_extra_bitsize +
859 dyn_lit_codes_bitsize + dyn_dist_codes_bitsize;
860 const fixed_bitsize = n: {
861 const freq7 = 1; // eos
862 var freq8: u16 = 0;
863 var freq9: u16 = 0;
864 var freq12: u16 = 0; // 7 + 5 - match freqs always have corresponding 5-bit dist freq
865 var freq13: u16 = 0; // 8 + 5
866 for (toks.lit_freqs[0..144]) |f| freq8 += f;
867 for (toks.lit_freqs[144..256]) |f| freq9 += f;
868 assert(toks.lit_freqs[256] == 1);
869 for (toks.lit_freqs[257..280]) |f| freq12 += f;
870 for (toks.lit_freqs[280..286]) |f| freq13 += f;
871 break :n @as(u32, freq7) * 7 +
872 @as(u32, freq8) * 8 + @as(u32, freq9) * 9 +
873 @as(u32, freq12) * 12 + @as(u32, freq13) * 13;
874 };
875
876 stored: {
877 for (toks.dist_freqs) |n| if (n != 0) break :stored;
878 // No need to check len frequencies since they each have a corresponding dist frequency
879 assert(for (toks.lit_freqs[257..]) |f| (if (f != 0) break false) else true);
880
881 // No matches. If the stored size is smaller than the huffman-encoded version, it will be
882 // outputed in a store block. This is not done with matches since the original input would
883 // need to be stored since the window may slid, and it may also exceed 65535 bytes. This
884 // should be OK since most inputs with matches should be more compressable anyways.
885 const stored_align_bits = -%(c.bit_writer.buffered_n +% 3);
886 const stored_bitsize = stored_align_bits + @as(u32, 32) + @as(u32, toks.n) * 8;
887 if (@min(dynamic_bitsize, fixed_bitsize) < stored_bitsize) break :stored;
888
889 try c.bit_writer.write(BlockHeader.int(.{ .kind = .stored, .final = eos }), 3);
890 try c.bit_writer.output.rebase(0, 5);
891 c.bit_writer.byteAlign();
892 c.bit_writer.output.writeInt(u16, c.buffered_tokens.n, .little) catch unreachable;
893 c.bit_writer.output.writeInt(u16, ~c.buffered_tokens.n, .little) catch unreachable;
894
895 // Relatively small buffer since regular draining will
896 // always consume slightly less than 2 << 15 bytes.
897 var vec_buf: [4][]const u8 = undefined;
898 var vec_n: usize = 0;
899 var i: usize = 0;
900
901 assert(c.buffered_tokens.pos != 0);
902 while (i != c.buffered_tokens.pos) {
903 const h: TokenBufferEntryHeader = @bitCast(toks.list[i..][0..2].*);
904 assert(h.kind == .bytes);
905
906 i += 2;
907 vec_buf[vec_n] = toks.list[i..][0..h.data];
908 i += h.data;
909
910 vec_n += 1;
911 if (i == c.buffered_tokens.pos or vec_n == vec_buf.len) {
912 try c.bit_writer.output.writeVecAll(vec_buf[0..vec_n]);
913 vec_n = 0;
914 }
915 }
916
917 toks.* = .empty;
918 return;
919 }
920
921 const lit_codes, const lit_bits, const dist_codes, const dist_bits =
922 if (dynamic_bitsize < fixed_bitsize) codes: {
923 try c.bit_writer.write(BlockHeader.Dynamic.int(.{
924 .regular = .{ .final = eos, .kind = .dynamic },
925 .hlit = @intCast(dyn_lit_len - 257),
926 .hdist = @intCast(dyn_dist_len - 1),
927 .hclen = hclen,
928 }), 17);
929 try c.bit_writer.writeClen(
930 hclen,
931 clen_values[0..clen_len],
932 clen_extra[0..clen_len],
933 clen_codes,
934 clen_bits,
935 );
936 break :codes .{
937 dyn_codes_buf[0..dyn_lit_len],
938 dyn_bits_buf[0..dyn_lit_len],
939 dyn_codes_buf[dyn_lit_len..][0..dyn_dist_len],
940 dyn_bits_buf[dyn_lit_len..][0..dyn_dist_len],
941 };
942 } else codes: {
943 try c.bit_writer.write(BlockHeader.int(.{ .final = eos, .kind = .fixed }), 3);
944 break :codes .{
945 &token.fixed_lit_codes,
946 &token.fixed_lit_bits,
947 &token.fixed_dist_codes,
948 &token.fixed_dist_bits,
949 };
950 };
951
952 var i: usize = 0;
953 while (i != toks.pos) {
954 const h: TokenBufferEntryHeader = @bitCast(toks.list[i..][0..2].*);
955 i += 2;
956 if (h.kind == .bytes) {
957 for (toks.list[i..][0..h.data]) |b| {
958 try c.bit_writer.write(lit_codes[b], lit_bits[b]);
959 }
960 i += h.data;
961 } else {
962 const dist = h.data;
963 const len = toks.list[i];
964 i += 1;
965 const dist_code = token.DistCode.fromVal(dist);
966 const len_code = token.LenCode.fromVal(len);
967 const dist_val = dist_code.toInt();
968 const lit_val = @as(u16, 257) + len_code.toInt();
969
970 var out: u48 = lit_codes[lit_val];
971 var out_bits: u6 = lit_bits[lit_val];
972 out |= @shlExact(@as(u20, len - len_code.base()), @intCast(out_bits));
973 out_bits += len_code.extraBits();
974
975 out |= @shlExact(@as(u35, dist_codes[dist_val]), out_bits);
976 out_bits += dist_bits[dist_val];
977 out |= @shlExact(@as(u48, dist - dist_code.base()), out_bits);
978 out_bits += dist_code.extraBits();
979
980 try c.bit_writer.write(out, out_bits);
981 }
982 }
983 try c.bit_writer.write(lit_codes[256], lit_bits[256]);
984
985 toks.* = .empty;
986}
987
988/// Huffman tree construction.
989///
990/// The approach for building the huffman tree is [taken from zlib]
991/// (https://github.com/madler/zlib/blob/v1.3.1/trees.c#L625) with some modifications.
992const huffman = struct {
993 const max_leafs = 286;
994 const max_nodes = max_leafs * 2;
995
996 const Node = packed struct(u32) {
997 depth: u16,
998 freq: u16,
999
1000 pub const Index = u16;
1001
1002 /// `freq` is more significant than `depth`
1003 pub fn smaller(a: Node, b: Node) bool {
1004 return @as(u32, @bitCast(a)) < @as(u32, @bitCast(b));
1005 }
1006 };
1007
1008 fn heapSiftDown(nodes: []Node, heap: []Node.Index, start: usize) void {
1009 var i = start;
1010 while (true) {
1011 var min = i;
1012 const l = i * 2 + 1;
1013 const r = l + 1;
1014 min = if (l < heap.len and nodes[heap[l]].smaller(nodes[heap[min]])) l else min;
1015 min = if (r < heap.len and nodes[heap[r]].smaller(nodes[heap[min]])) r else min;
1016 if (i == min) break;
1017 mem.swap(Node.Index, &heap[i], &heap[min]);
1018 i = min;
1019 }
1020 }
1021
1022 fn heapRemoveRoot(nodes: []Node, heap: []Node.Index) void {
1023 heap[0] = heap[heap.len - 1];
1024 heapSiftDown(nodes, heap[0 .. heap.len - 1], 0);
1025 }
1026
1027 /// Returns the total bits to encode `freqs` followed by the index of the last non-zero bits.
1028 /// For `freqs[i]` == 0, `out_codes[i]` will be undefined.
1029 /// It is asserted `out_bits` is zero-filled.
1030 /// It is asserted `out_bits.len` is at least a length of
1031 /// one if ncomplete trees are allowed and two otherwise.
1032 pub fn build(
1033 freqs: []const u16,
1034 out_codes: []u16,
1035 out_bits: []u4,
1036 max_bits: u4,
1037 incomplete_allowed: bool,
1038 ) struct { u32, u16 } {
1039 assert(out_codes.len - 1 >= @intFromBool(incomplete_allowed));
1040 // freqs and out_codes are in the loop to assert they are all the same length
1041 for (freqs, out_codes, out_bits) |_, _, n| assert(n == 0);
1042 assert(out_codes.len <= @as(u16, 1) << max_bits);
1043
1044 // Indexes 0..freqs are leafs, indexes max_leafs.. are internal nodes.
1045 var tree_nodes: [max_nodes]Node = undefined;
1046 var tree_parent_nodes: [max_nodes]Node.Index = undefined;
1047 var nodes_end: u16 = max_leafs;
1048 // Dual-purpose buffer. Nodes are ordered by least frequency or when equal, least depth.
1049 // The start is a min heap of level-zero nodes.
1050 // The end is a sorted buffer of nodes with the greatest first.
1051 var node_buf: [max_nodes]Node.Index = undefined;
1052 var heap_end: u16 = 0;
1053 var sorted_start: u16 = node_buf.len;
1054
1055 for (0.., freqs) |n, freq| {
1056 tree_nodes[n] = .{ .freq = freq, .depth = 0 };
1057 node_buf[heap_end] = @intCast(n);
1058 heap_end += @intFromBool(freq != 0);
1059 }
1060
1061 // There must be at least one code at minimum,
1062 node_buf[heap_end] = 0;
1063 heap_end += @intFromBool(heap_end == 0);
1064 // and at least two if incomplete must be avoided.
1065 if (heap_end == 1 and incomplete_allowed) {
1066 @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
1067
1068 // Codes must have at least one-bit, so this is a special case.
1069 out_bits[node_buf[0]] = 1;
1070 out_codes[node_buf[0]] = 0;
1071 return .{ freqs[node_buf[0]], node_buf[0] };
1072 }
1073 const last_nonzero = @max(node_buf[heap_end - 1], 1); // For heap_end > 1, last is not be 0
1074 node_buf[heap_end] = @intFromBool(node_buf[0] == 0);
1075 heap_end += @intFromBool(heap_end == 1);
1076
1077 // Heapify the array of frequencies
1078 const heapify_final = heap_end - 1;
1079 const heapify_start = (heapify_final - 1) / 2; // Parent of final node
1080 var heapify_i = heapify_start;
1081 while (true) {
1082 heapSiftDown(&tree_nodes, node_buf[0..heap_end], heapify_i);
1083 if (heapify_i == 0) break;
1084 heapify_i -= 1;
1085 }
1086
1087 // Build optimal tree. `max_bits` is not enforced yet.
1088 while (heap_end > 1) {
1089 const a = node_buf[0];
1090 heapRemoveRoot(&tree_nodes, node_buf[0..heap_end]);
1091 heap_end -= 1;
1092 const b = node_buf[0];
1093
1094 sorted_start -= 2;
1095 node_buf[sorted_start..][0..2].* = .{ b, a };
1096
1097 tree_nodes[nodes_end] = .{
1098 .freq = tree_nodes[a].freq + tree_nodes[b].freq,
1099 .depth = @max(tree_nodes[a].depth, tree_nodes[b].depth) + 1,
1100 };
1101 defer nodes_end += 1;
1102 tree_parent_nodes[a] = nodes_end;
1103 tree_parent_nodes[b] = nodes_end;
1104
1105 node_buf[0] = nodes_end;
1106 heapSiftDown(&tree_nodes, node_buf[0..heap_end], 0);
1107 }
1108 sorted_start -= 1;
1109 node_buf[sorted_start] = node_buf[0];
1110
1111 var bit_counts: [16]u16 = @splat(0);
1112 buildBits(out_bits, &bit_counts, &tree_parent_nodes, node_buf[sorted_start..], max_bits);
1113 return .{ buildValues(freqs, out_codes, out_bits, bit_counts), last_nonzero };
1114 }
1115
1116 fn buildBits(
1117 out_bits: []u4,
1118 bit_counts: *[16]u16,
1119 parent_nodes: *[max_nodes]Node.Index,
1120 sorted: []Node.Index,
1121 max_bits: u4,
1122 ) void {
1123 var internal_node_bits: [max_nodes - max_leafs]u4 = undefined;
1124 var overflowed: u16 = 0;
1125
1126 internal_node_bits[sorted[0] - max_leafs] = 0; // root
1127 for (sorted[1..]) |i| {
1128 const parent_bits = internal_node_bits[parent_nodes[i] - max_leafs];
1129 overflowed += @intFromBool(parent_bits == max_bits);
1130 const bits = parent_bits + @intFromBool(parent_bits != max_bits);
1131 bit_counts[bits] += @intFromBool(i < max_leafs);
1132 (if (i >= max_leafs) &internal_node_bits[i - max_leafs] else &out_bits[i]).* = bits;
1133 }
1134
1135 if (overflowed == 0) {
1136 @branchHint(.likely);
1137 return;
1138 }
1139
1140 outer: while (true) {
1141 var deepest: u4 = max_bits - 1;
1142 while (bit_counts[deepest] == 0) deepest -= 1;
1143 while (overflowed != 0) {
1144 // Insert an internal node under the leaf and move an overflow as its sibling
1145 bit_counts[deepest] -= 1;
1146 bit_counts[deepest + 1] += 2;
1147 // Only overflow moved. Its sibling's depth is one less, however is still >= depth.
1148 bit_counts[max_bits] -= 1;
1149 overflowed -= 2;
1150
1151 if (overflowed == 0) break :outer;
1152 deepest += 1;
1153 if (deepest == max_bits) continue :outer;
1154 }
1155 }
1156
1157 // Reassign bit lengths
1158 assert(bit_counts[0] == 0);
1159 var i: usize = 0;
1160 for (1.., bit_counts[1..]) |bits, all| {
1161 var remaining = all;
1162 while (remaining != 0) {
1163 defer i += 1;
1164 if (sorted[i] >= max_leafs) continue;
1165 out_bits[sorted[i]] = @intCast(bits);
1166 remaining -= 1;
1167 }
1168 }
1169 assert(for (sorted[i..]) |n| { // all leafs consumed
1170 if (n < max_leafs) break false;
1171 } else true);
1172 }
1173
1174 fn buildValues(freqs: []const u16, out_codes: []u16, bits: []u4, bit_counts: [16]u16) u32 {
1175 var code: u16 = 0;
1176 var base: [16]u16 = undefined;
1177 assert(bit_counts[0] == 0);
1178 for (bit_counts[1..], base[1..]) |c, *b| {
1179 b.* = code;
1180 code +%= c;
1181 code <<= 1;
1182 }
1183 var freq_sums: [16]u16 = @splat(0);
1184 for (out_codes, bits, freqs) |*c, b, f| {
1185 c.* = @bitReverse(base[b]) >> -%b;
1186 base[b] += 1; // For `b == 0` this is fine since v is specified to be undefined.
1187 freq_sums[b] += f;
1188 }
1189 return @reduce(.Add, @as(@Vector(16, u32), freq_sums) * std.simd.iota(u32, 16));
1190 }
1191
1192 test build {
1193 var codes: [8]u16 = undefined;
1194 var bits: [8]u4 = undefined;
1195
1196 const regular_freqs: [8]u16 = .{ 1, 1, 0, 8, 8, 0, 2, 4 };
1197 // The optimal tree for the above frequencies is
1198 // 4 1 1
1199 // \ /
1200 // 3 2 #
1201 // \ /
1202 // 2 8 8 4 #
1203 // \ / \ /
1204 // 1 # #
1205 // \ /
1206 // 0 #
1207 bits = @splat(0);
1208 var n, var lnz = build(®ular_freqs, &codes, &bits, 15, true);
1209 codes[2] = 0;
1210 codes[5] = 0;
1211 try std.testing.expectEqualSlices(u4, &.{ 4, 4, 0, 2, 2, 0, 3, 2 }, &bits);
1212 try std.testing.expectEqualSlices(u16, &.{
1213 0b0111, 0b1111, 0, 0b00, 0b10, 0, 0b011, 0b01,
1214 }, &codes);
1215 try std.testing.expectEqual(54, n);
1216 try std.testing.expectEqual(7, lnz);
1217 // When constrained to 3 bits, it becomes
1218 // 3 1 1 2 4
1219 // \ / \ /
1220 // 2 8 8 # #
1221 // \ / \ /
1222 // 1 # #
1223 // \ /
1224 // 0 #
1225 bits = @splat(0);
1226 n, lnz = build(®ular_freqs, &codes, &bits, 3, true);
1227 codes[2] = 0;
1228 codes[5] = 0;
1229 try std.testing.expectEqualSlices(u4, &.{ 3, 3, 0, 2, 2, 0, 3, 3 }, &bits);
1230 try std.testing.expectEqualSlices(u16, &.{
1231 0b001, 0b101, 0, 0b00, 0b10, 0, 0b011, 0b111,
1232 }, &codes);
1233 try std.testing.expectEqual(56, n);
1234 try std.testing.expectEqual(7, lnz);
1235
1236 // Empty tree. At least one code should be present
1237 bits = @splat(0);
1238 n, lnz = build(&.{ 0, 0 }, codes[0..2], bits[0..2], 15, true);
1239 try std.testing.expectEqualSlices(u4, &.{ 1, 0 }, bits[0..2]);
1240 try std.testing.expectEqual(0b0, codes[0]);
1241 try std.testing.expectEqual(0, n);
1242 try std.testing.expectEqual(0, lnz);
1243
1244 // Check all incompletable frequencies are completed
1245 for ([_][2]u16{ .{ 0, 0 }, .{ 0, 1 }, .{ 1, 0 } }) |incomplete| {
1246 // Empty tree. Both codes should be present to prevent incomplete trees
1247 bits = @splat(0);
1248 n, lnz = build(&incomplete, codes[0..2], bits[0..2], 15, false);
1249 try std.testing.expectEqualSlices(u4, &.{ 1, 1 }, bits[0..2]);
1250 try std.testing.expectEqualSlices(u16, &.{ 0b0, 0b1 }, codes[0..2]);
1251 try std.testing.expectEqual(incomplete[0] + incomplete[1], n);
1252 try std.testing.expectEqual(1, lnz);
1253 }
1254
1255 try std.testing.fuzz({}, checkFuzzedBuildFreqs, .{});
1256 }
1257
1258 fn checkFuzzedBuildFreqs(_: void, freqs: []const u8) !void {
1259 @disableInstrumentation();
1260 var r: Io.Reader = .fixed(freqs);
1261 var freqs_limit: u16 = 65535;
1262 var freqs_buf: [max_leafs]u16 = undefined;
1263 var nfreqs: u15 = 0;
1264
1265 const params: packed struct(u8) {
1266 max_bits: u4,
1267 _: u3,
1268 incomplete_allowed: bool,
1269 } = @bitCast(r.takeByte() catch 255);
1270 while (nfreqs != freqs_buf.len) {
1271 const leb = r.takeLeb128(u16);
1272 const f = if (leb) |f| @min(f, freqs_limit) else |e| switch (e) {
1273 error.ReadFailed => unreachable,
1274 error.EndOfStream => 0,
1275 error.Overflow => freqs_limit,
1276 };
1277 freqs_buf[nfreqs] = f;
1278 nfreqs += 1;
1279 freqs_limit -= f;
1280 if (leb == error.EndOfStream and nfreqs - 1 > @intFromBool(params.incomplete_allowed))
1281 break;
1282 }
1283
1284 var codes_buf: [max_leafs]u16 = undefined;
1285 var bits_buf: [max_leafs]u4 = @splat(0);
1286 const total_bits, const last_nonzero = build(
1287 freqs_buf[0..nfreqs],
1288 codes_buf[0..nfreqs],
1289 bits_buf[0..nfreqs],
1290 @max(math.log2_int_ceil(u15, nfreqs), params.max_bits),
1291 params.incomplete_allowed,
1292 );
1293
1294 var has_bitlen_one: bool = false;
1295 var expected_total_bits: u32 = 0;
1296 var expected_last_nonzero: ?u16 = null;
1297 var weighted_sum: u32 = 0;
1298 for (freqs_buf[0..nfreqs], bits_buf[0..nfreqs], 0..) |f, nb, i| {
1299 has_bitlen_one = has_bitlen_one or nb == 1;
1300 weighted_sum += @shlExact(@as(u16, 1), 15 - nb) & ((1 << 15) - 1);
1301 expected_total_bits += @as(u32, f) * nb;
1302 if (nb != 0) expected_last_nonzero = @intCast(i);
1303 }
1304
1305 errdefer std.log.err(
1306 \\ params: {}
1307 \\ freqs: {any}
1308 \\ bits: {any}
1309 \\ # freqs: {}
1310 \\ max bits: {}
1311 \\ weighted sum: {}
1312 \\ has_bitlen_one: {}
1313 \\ expected/actual total bits: {}/{}
1314 \\ expected/actual last nonzero: {?}/{}
1315 ++ "\n", .{
1316 params,
1317 freqs_buf[0..nfreqs],
1318 bits_buf[0..nfreqs],
1319 nfreqs,
1320 @max(math.log2_int_ceil(u15, nfreqs), params.max_bits),
1321 weighted_sum,
1322 has_bitlen_one,
1323 expected_total_bits,
1324 total_bits,
1325 expected_last_nonzero,
1326 last_nonzero,
1327 });
1328
1329 try std.testing.expectEqual(expected_total_bits, total_bits);
1330 try std.testing.expectEqual(expected_last_nonzero, last_nonzero);
1331 if (weighted_sum > 1 << 15)
1332 return error.OversubscribedHuffmanTree;
1333 if (weighted_sum < 1 << 15 and
1334 !(params.incomplete_allowed and has_bitlen_one and weighted_sum == 1 << 14))
1335 return error.IncompleteHuffmanTree;
1336 }
1337};
1338
1339test {
1340 _ = huffman;
1341}
1342
1343/// [0] is a gradient where the probability of lower values decreases across it
1344/// [1] is completely random and hence uncompressable
1345fn testingFreqBufs() !*[2][65536]u8 {
1346 const fbufs = try std.testing.allocator.create([2][65536]u8);
1347 var prng: std.Random.DefaultPrng = .init(std.testing.random_seed);
1348 prng.random().bytes(&fbufs[0]);
1349 prng.random().bytes(&fbufs[1]);
1350 for (0.., &fbufs[0], fbufs[1]) |i, *grad, rand| {
1351 const prob = @as(u8, @intCast(255 - i / (fbufs[0].len * 256)));
1352 grad.* /= @max(1, rand / @max(1, prob));
1353 }
1354 return fbufs;
1355}
1356
1357fn testingCheckDecompressedMatches(
1358 flate_bytes: []const u8,
1359 expected_size: u32,
1360 expected_hash: flate.Container.Hasher,
1361) !void {
1362 const container: flate.Container = expected_hash;
1363 var data_hash: flate.Container.Hasher = .init(container);
1364 var data_size: u32 = 0;
1365 var flate_r: Io.Reader = .fixed(flate_bytes);
1366 var deflate_buf: [flate.max_window_len]u8 = undefined;
1367 var deflate: flate.Decompress = .init(&flate_r, container, &deflate_buf);
1368
1369 while (deflate.reader.peekGreedy(1)) |bytes| {
1370 data_size += @intCast(bytes.len);
1371 data_hash.update(bytes);
1372 deflate.reader.toss(bytes.len);
1373 } else |e| switch (e) {
1374 error.ReadFailed => return deflate.err.?,
1375 error.EndOfStream => {},
1376 }
1377
1378 try testingCheckContainerHash(
1379 expected_size,
1380 expected_hash,
1381 data_hash,
1382 data_size,
1383 deflate.container_metadata,
1384 );
1385}
1386
1387fn testingCheckContainerHash(
1388 expected_size: u32,
1389 expected_hash: flate.Container.Hasher,
1390 actual_hash: flate.Container.Hasher,
1391 actual_size: u32,
1392 actual_meta: flate.Container.Metadata,
1393) !void {
1394 try std.testing.expectEqual(expected_size, actual_size);
1395 switch (actual_hash) {
1396 .raw => {},
1397 .gzip => |gz| {
1398 const expected_crc = expected_hash.gzip.crc.final();
1399 try std.testing.expectEqual(expected_size, actual_meta.gzip.count);
1400 try std.testing.expectEqual(expected_crc, gz.crc.final());
1401 try std.testing.expectEqual(expected_crc, actual_meta.gzip.crc);
1402 },
1403 .zlib => |zl| {
1404 const expected_adler = expected_hash.zlib.adler;
1405 try std.testing.expectEqual(expected_adler, zl.adler);
1406 try std.testing.expectEqual(expected_adler, actual_meta.zlib.adler);
1407 },
1408 }
1409}
1410
1411const PackedContainer = packed struct(u2) {
1412 raw: bool,
1413 other: enum(u1) { gzip, zlib },
1414
1415 pub fn val(c: @This()) flate.Container {
1416 return if (c.raw) .raw else switch (c.other) {
1417 .gzip => .gzip,
1418 .zlib => .zlib,
1419 };
1420 }
1421};
1422
1423test Compress {
1424 const fbufs = try testingFreqBufs();
1425 defer if (!builtin.fuzz) std.testing.allocator.destroy(fbufs);
1426 try std.testing.fuzz(fbufs, testFuzzedCompressInput, .{});
1427}
1428
1429fn testFuzzedCompressInput(fbufs: *const [2][65536]u8, input: []const u8) !void {
1430 var in: Io.Reader = .fixed(input);
1431 var opts: packed struct(u51) {
1432 container: PackedContainer,
1433 buf_size: u16,
1434 good: u8,
1435 nice: u8,
1436 lazy: u8,
1437 /// Not a `u16` to limit it for performance
1438 chain: u9,
1439 } = @bitCast(in.takeLeb128(u51) catch 0);
1440 var expected_hash: flate.Container.Hasher = .init(opts.container.val());
1441 var expected_size: u32 = 0;
1442
1443 var flate_buf: [128 * 1024]u8 = undefined;
1444 var flate_w: Writer = .fixed(&flate_buf);
1445 var deflate_buf: [flate.max_window_len * 2]u8 = undefined;
1446 var deflate_w = try Compress.init(
1447 &flate_w,
1448 deflate_buf[0 .. flate.max_window_len + @as(usize, opts.buf_size)],
1449 opts.container.val(),
1450 .{
1451 .good = @as(u16, opts.good) + 3,
1452 .nice = @as(u16, opts.nice) + 3,
1453 .lazy = @as(u16, @min(opts.lazy, opts.nice)) + 3,
1454 .chain = @max(1, opts.chain, @as(u8, 4) * @intFromBool(opts.good <= opts.lazy)),
1455 },
1456 );
1457
1458 // It is ensured that more bytes are not written then this to ensure this run
1459 // does not take too long and that `flate_buf` does not run out of space.
1460 const flate_buf_blocks = flate_buf.len / block_tokens;
1461 // Allow a max overhead of 64 bytes per block since the implementation does not gaurauntee it
1462 // writes store blocks when optimal. This comes from taking less than 32 bytes to write an
1463 // optimal dynamic block header of mostly bitlen 8 codes and the end of block literal plus
1464 // `(65536 / 256) / 8`, which is is the maximum number of extra bytes from bitlen 9 codes. An
1465 // extra 32 bytes is reserved on top of that for container headers and footers.
1466 const max_size = flate_buf.len - (flate_buf_blocks * 64 + 32);
1467
1468 while (true) {
1469 const data: packed struct(u36) {
1470 is_rebase: bool,
1471 is_bytes: bool,
1472 params: packed union {
1473 copy: packed struct(u34) {
1474 len_lo: u5,
1475 dist: u15,
1476 len_hi: u4,
1477 _: u10,
1478 },
1479 bytes: packed struct(u34) {
1480 kind: enum(u1) { gradient, random },
1481 off_hi: u4,
1482 len_lo: u10,
1483 off_mi: u4,
1484 len_hi: u5,
1485 off_lo: u8,
1486 _: u2,
1487 },
1488 rebase: packed struct(u34) {
1489 preserve: u17,
1490 capacity: u17,
1491 },
1492 },
1493 } = @bitCast(in.takeLeb128(u36) catch |e| switch (e) {
1494 error.ReadFailed => unreachable,
1495 error.Overflow => 0,
1496 error.EndOfStream => break,
1497 });
1498
1499 const buffered = deflate_w.writer.buffered();
1500 // Required for repeating patterns and since writing from `buffered` is illegal
1501 var copy_buf: [512]u8 = undefined;
1502
1503 if (data.is_rebase) {
1504 const usable_capacity = deflate_w.writer.buffer.len - rebase_reserved_capacity;
1505 const preserve = @min(data.params.rebase.preserve, usable_capacity);
1506 const capacity = @min(data.params.rebase.capacity, usable_capacity -
1507 @max(rebase_min_preserve, preserve));
1508 try deflate_w.writer.rebase(preserve, capacity);
1509 continue;
1510 }
1511
1512 const max_bytes = max_size -| expected_size;
1513 const bytes = if (!data.is_bytes and buffered.len != 0) bytes: {
1514 const dist = @min(buffered.len, @as(u32, data.params.copy.dist) + 1);
1515 const len = @min(
1516 @max(@shlExact(@as(u9, data.params.copy.len_hi), 5) | data.params.copy.len_lo, 1),
1517 max_bytes,
1518 );
1519 // Reuse the implementation's history. Otherwise our own would need maintained.
1520 const bytes_start = buffered[buffered.len - dist ..];
1521 const history_bytes = bytes_start[0..@min(bytes_start.len, len)];
1522
1523 @memcpy(copy_buf[0..history_bytes.len], history_bytes);
1524 const new_history = len - history_bytes.len;
1525 if (history_bytes.len != len) for ( // check needed for `- dist`
1526 copy_buf[history_bytes.len..][0..new_history],
1527 copy_buf[history_bytes.len - dist ..][0..new_history],
1528 ) |*next, prev| {
1529 next.* = prev;
1530 };
1531 break :bytes copy_buf[0..len];
1532 } else bytes: {
1533 const off = @shlExact(@as(u16, data.params.bytes.off_hi), 12) |
1534 @shlExact(@as(u16, data.params.bytes.off_mi), 8) |
1535 data.params.bytes.off_lo;
1536 const len = @shlExact(@as(u16, data.params.bytes.len_hi), 10) |
1537 data.params.bytes.len_lo;
1538 const fbuf = &fbufs[@intFromEnum(data.params.bytes.kind)];
1539 break :bytes fbuf[off..][0..@min(len, fbuf.len - off, max_bytes)];
1540 };
1541 assert(bytes.len <= max_bytes);
1542 try deflate_w.writer.writeAll(bytes);
1543 expected_hash.update(bytes);
1544 expected_size += @intCast(bytes.len);
1545 }
1546
1547 try deflate_w.writer.flush();
1548 try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash);
1549}
1550
1551/// Does not compress data
1552pub const Raw = struct {
1553 /// After `flush` is called, all vtable calls with result in `error.WriteFailed.`
1554 writer: Writer,
1555 output: *Writer,
1556 hasher: flate.Container.Hasher,
1557
1558 const max_block_size: u16 = 65535;
1559 const full_header: [5]u8 = .{
1560 BlockHeader.int(.{ .final = false, .kind = .stored }),
1561 255,
1562 255,
1563 0,
1564 0,
1565 };
1566
1567 /// While there is no minimum buffer size, it is recommended
1568 /// to be at least `flate.max_window_len` for optimal output.
1569 pub fn init(output: *Writer, buffer: []u8, container: flate.Container) Writer.Error!Raw {
1570 try output.writeAll(container.header());
1571 return .{
1572 .writer = .{
1573 .buffer = buffer,
1574 .vtable = &.{
1575 .drain = Raw.drain,
1576 .flush = Raw.flush,
1577 .rebase = Raw.rebase,
1578 },
1579 },
1580 .output = output,
1581 .hasher = .init(container),
1582 };
1583 }
1584
1585 fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
1586 errdefer w.* = .failing;
1587 const r: *Raw = @fieldParentPtr("writer", w);
1588 const min_block = @min(w.buffer.len, max_block_size);
1589 const pattern = data[data.len - 1];
1590 var partial_header: [5]u8 = undefined;
1591
1592 var vecs: [16][]const u8 = undefined;
1593 var vecs_n: usize = 0;
1594 const data_bytes = Writer.countSplat(data, splat);
1595 const total_bytes = w.end + data_bytes;
1596 var rem_bytes = total_bytes;
1597 var rem_splat = splat;
1598 var rem_data = data;
1599 var rem_data_elem: []const u8 = w.buffered();
1600
1601 assert(rem_bytes > min_block);
1602 while (rem_bytes > min_block) { // not >= to allow `min_block` blocks to be marked as final
1603 // also, it handles the case of `min_block` being zero (no buffer)
1604 const block_size: u16 = @min(rem_bytes, max_block_size);
1605 rem_bytes -= block_size;
1606
1607 if (vecs_n == vecs.len) {
1608 try r.output.writeVecAll(&vecs);
1609 vecs_n = 0;
1610 }
1611 vecs[vecs_n] = if (block_size == 65535)
1612 &full_header
1613 else header: {
1614 partial_header[0] = BlockHeader.int(.{ .final = false, .kind = .stored });
1615 mem.writeInt(u16, partial_header[1..3], block_size, .little);
1616 mem.writeInt(u16, partial_header[3..5], ~block_size, .little);
1617 break :header &partial_header;
1618 };
1619 vecs_n += 1;
1620
1621 var block_limit: Io.Limit = .limited(block_size);
1622 while (true) {
1623 if (vecs_n == vecs.len) {
1624 try r.output.writeVecAll(&vecs);
1625 vecs_n = 0;
1626 }
1627
1628 const vec = block_limit.sliceConst(rem_data_elem);
1629 vecs[vecs_n] = vec;
1630 vecs_n += 1;
1631 r.hasher.update(vec);
1632
1633 const is_pattern = rem_splat != splat and vec.len == pattern.len;
1634 if (is_pattern) assert(pattern.len != 0); // exceeded countSplat
1635
1636 if (!is_pattern or rem_splat == 0 or pattern.len > @intFromEnum(block_limit) / 2) {
1637 rem_data_elem = rem_data_elem[vec.len..];
1638 block_limit = block_limit.subtract(vec.len).?;
1639
1640 if (rem_data_elem.len == 0) {
1641 rem_data_elem = rem_data[0];
1642 if (rem_data.len != 1) {
1643 rem_data = rem_data[1..];
1644 } else if (rem_splat != 0) {
1645 rem_splat -= 1;
1646 } else {
1647 // All of `data` has been consumed.
1648 assert(block_limit == .nothing);
1649 assert(rem_bytes == 0);
1650 // Since `rem_bytes` and `block_limit` are zero, these won't be used.
1651 rem_data = undefined;
1652 rem_data_elem = undefined;
1653 rem_splat = undefined;
1654 }
1655 }
1656 if (block_limit == .nothing) break;
1657 } else {
1658 const out_splat = @intFromEnum(block_limit) / pattern.len;
1659 assert(out_splat >= 2);
1660
1661 try r.output.writeSplatAll(vecs[0..vecs_n], out_splat);
1662 for (1..out_splat) |_| r.hasher.update(vec);
1663
1664 vecs_n = 0;
1665 block_limit = block_limit.subtract(pattern.len * out_splat).?;
1666 if (rem_splat >= out_splat) {
1667 // `out_splat` contains `rem_data`, however one more needs subtracted
1668 // anyways since the next pattern is also being taken.
1669 rem_splat -= out_splat;
1670 } else {
1671 // All of `data` has been consumed.
1672 assert(block_limit == .nothing);
1673 assert(rem_bytes == 0);
1674 // Since `rem_bytes` and `block_limit` are zero, these won't be used.
1675 rem_data = undefined;
1676 rem_data_elem = undefined;
1677 rem_splat = undefined;
1678 }
1679 if (block_limit == .nothing) break;
1680 }
1681 }
1682 }
1683
1684 if (vecs_n != 0) { // can be the case if a splat was sent
1685 try r.output.writeVecAll(vecs[0..vecs_n]);
1686 }
1687
1688 if (rem_bytes > data_bytes) {
1689 assert(rem_bytes - data_bytes == rem_data_elem.len);
1690 assert(&rem_data_elem[0] == &w.buffer[total_bytes - rem_bytes]);
1691 }
1692 return w.consume(total_bytes - rem_bytes);
1693 }
1694
1695 fn flush(w: *Writer) Writer.Error!void {
1696 defer w.* = .failing;
1697 try Raw.rebaseInner(w, 0, w.buffer.len, true);
1698 }
1699
1700 fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
1701 errdefer w.* = .failing;
1702 try Raw.rebaseInner(w, preserve, capacity, false);
1703 }
1704
1705 fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
1706 const r: *Raw = @fieldParentPtr("writer", w);
1707 assert(preserve + capacity <= w.buffer.len);
1708 if (eos) assert(capacity == w.buffer.len);
1709
1710 var partial_header: [5]u8 = undefined;
1711 var footer_buf: [8]u8 = undefined;
1712 const preserved = @min(w.end, preserve);
1713 var remaining = w.buffer[0 .. w.end - preserved];
1714
1715 var vecs: [16][]const u8 = undefined;
1716 var vecs_n: usize = 0;
1717 while (remaining.len > max_block_size) { // not >= so there is always a block down below
1718 if (vecs_n == vecs.len) {
1719 try r.output.writeVecAll(&vecs);
1720 vecs_n = 0;
1721 }
1722 vecs[vecs_n + 0] = &full_header;
1723 vecs[vecs_n + 1] = remaining[0..max_block_size];
1724 r.hasher.update(vecs[vecs_n + 1]);
1725 vecs_n += 2;
1726 remaining = remaining[max_block_size..];
1727 }
1728
1729 // eos check required for empty block
1730 if (w.buffer.len - (remaining.len + preserved) < capacity or eos) {
1731 // A partial write is necessary to reclaim enough buffer space
1732 const block_size: u16 = @intCast(remaining.len);
1733 partial_header[0] = BlockHeader.int(.{ .final = eos, .kind = .stored });
1734 mem.writeInt(u16, partial_header[1..3], block_size, .little);
1735 mem.writeInt(u16, partial_header[3..5], ~block_size, .little);
1736
1737 if (vecs_n == vecs.len) {
1738 try r.output.writeVecAll(&vecs);
1739 vecs_n = 0;
1740 }
1741 vecs[vecs_n + 0] = &partial_header;
1742 vecs[vecs_n + 1] = remaining[0..block_size];
1743 r.hasher.update(vecs[vecs_n + 1]);
1744 vecs_n += 2;
1745 remaining = remaining[block_size..];
1746 assert(remaining.len == 0);
1747
1748 if (eos and r.hasher != .raw) {
1749 // the footer is done here instead of `flush` so it can be included in the vector
1750 var footer_w: Writer = .fixed(&footer_buf);
1751 r.hasher.writeFooter(&footer_w) catch unreachable;
1752 assert(footer_w.end != 0);
1753
1754 if (vecs_n == vecs.len) {
1755 try r.output.writeVecAll(&vecs);
1756 return r.output.writeAll(footer_w.buffered());
1757 } else {
1758 vecs[vecs_n] = footer_w.buffered();
1759 vecs_n += 1;
1760 }
1761 }
1762 }
1763
1764 try r.output.writeVecAll(vecs[0..vecs_n]);
1765 _ = w.consume(w.end - preserved - remaining.len);
1766 }
1767};
1768
1769test Raw {
1770 const data_buf = try std.testing.allocator.create([4 * 65536]u8);
1771 defer if (!builtin.fuzz) std.testing.allocator.destroy(data_buf);
1772 var prng: std.Random.DefaultPrng = .init(std.testing.random_seed);
1773 prng.random().bytes(data_buf);
1774 try std.testing.fuzz(data_buf, testFuzzedRawInput, .{});
1775}
1776
1777fn countVec(data: []const []const u8) usize {
1778 var bytes: usize = 0;
1779 for (data) |d| bytes += d.len;
1780 return bytes;
1781}
1782
1783fn testFuzzedRawInput(data_buf: *const [4 * 65536]u8, input: []const u8) !void {
1784 const HashedStoreWriter = struct {
1785 writer: Writer,
1786 state: enum {
1787 header,
1788 block_header,
1789 block_body,
1790 final_block_body,
1791 footer,
1792 end,
1793 },
1794 block_remaining: u16,
1795 container: flate.Container,
1796 data_hash: flate.Container.Hasher,
1797 data_size: usize,
1798 footer_hash: u32,
1799 footer_size: u32,
1800
1801 pub fn init(buf: []u8, container: flate.Container) @This() {
1802 return .{
1803 .writer = .{
1804 .vtable = &.{
1805 .drain = @This().drain,
1806 .flush = @This().flush,
1807 },
1808 .buffer = buf,
1809 },
1810 .state = .header,
1811 .block_remaining = 0,
1812 .container = container,
1813 .data_hash = .init(container),
1814 .data_size = 0,
1815 .footer_hash = undefined,
1816 .footer_size = undefined,
1817 };
1818 }
1819
1820 /// Note that this implementation is somewhat dependent on the implementation of
1821 /// `Raw` by expecting headers / footers to be continous in data elements. It
1822 /// also expects the header to be the same as `flate.Container.header` and not
1823 /// for multiple streams to be concatenated.
1824 fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
1825 errdefer w.* = .failing;
1826 var h: *@This() = @fieldParentPtr("writer", w);
1827
1828 var rem_splat = splat;
1829 var rem_data = data;
1830 var rem_data_elem: []const u8 = w.buffered();
1831
1832 data_loop: while (true) {
1833 const wanted = switch (h.state) {
1834 .header => h.container.headerSize(),
1835 .block_header => 5,
1836 .block_body, .final_block_body => h.block_remaining,
1837 .footer => h.container.footerSize(),
1838 .end => 1,
1839 };
1840
1841 if (wanted != 0) {
1842 while (rem_data_elem.len == 0) {
1843 rem_data_elem = rem_data[0];
1844 if (rem_data.len != 1) {
1845 rem_data = rem_data[1..];
1846 } else {
1847 if (rem_splat == 0) {
1848 break :data_loop;
1849 } else {
1850 rem_splat -= 1;
1851 }
1852 }
1853 }
1854 }
1855
1856 const bytes = Io.Limit.limited(wanted).sliceConst(rem_data_elem);
1857 rem_data_elem = rem_data_elem[bytes.len..];
1858
1859 switch (h.state) {
1860 .header => {
1861 if (bytes.len < wanted)
1862 return error.WriteFailed; // header eos
1863 if (!mem.eql(u8, bytes, h.container.header()))
1864 return error.WriteFailed; // wrong header
1865 h.state = .block_header;
1866 },
1867 .block_header => {
1868 if (bytes.len < wanted)
1869 return error.WriteFailed; // store block header eos
1870 const header: BlockHeader = @bitCast(@as(u3, @truncate(bytes[0])));
1871 if (header.kind != .stored)
1872 return error.WriteFailed; // non-store block
1873 const len = mem.readInt(u16, bytes[1..3], .little);
1874 const nlen = mem.readInt(u16, bytes[3..5], .little);
1875 if (nlen != ~len)
1876 return error.WriteFailed; // wrong nlen
1877 h.block_remaining = len;
1878 h.state = if (!header.final) .block_body else .final_block_body;
1879 },
1880 .block_body, .final_block_body => {
1881 h.data_hash.update(bytes);
1882 h.data_size += bytes.len;
1883 h.block_remaining -= @intCast(bytes.len);
1884 if (h.block_remaining == 0) {
1885 h.state = if (h.state != .final_block_body) .block_header else .footer;
1886 }
1887 },
1888 .footer => {
1889 if (bytes.len < wanted)
1890 return error.WriteFailed; // footer eos
1891 switch (h.container) {
1892 .raw => {},
1893 .gzip => {
1894 h.footer_hash = mem.readInt(u32, bytes[0..4], .little);
1895 h.footer_size = mem.readInt(u32, bytes[4..8], .little);
1896 },
1897 .zlib => {
1898 h.footer_hash = mem.readInt(u32, bytes[0..4], .big);
1899 },
1900 }
1901 h.state = .end;
1902 },
1903 .end => return error.WriteFailed, // data past end
1904 }
1905 }
1906
1907 w.end = 0;
1908 return Writer.countSplat(data, splat);
1909 }
1910
1911 fn flush(w: *Writer) Writer.Error!void {
1912 defer w.* = .failing; // Clears buffer even if state hasn't reached `end`
1913 _ = try @This().drain(w, &.{""}, 0);
1914 }
1915 };
1916
1917 var in: Io.Reader = .fixed(input);
1918 const opts: packed struct(u19) {
1919 container: PackedContainer,
1920 buf_len: u17,
1921 } = @bitCast(in.takeLeb128(u19) catch 0);
1922 var output: HashedStoreWriter = .init(&.{}, opts.container.val());
1923 var r_buf: [2 * 65536]u8 = undefined;
1924 var r: Raw = try .init(
1925 &output.writer,
1926 r_buf[0 .. opts.buf_len +% flate.max_window_len],
1927 opts.container.val(),
1928 );
1929
1930 var data_base: u18 = 0;
1931 var expected_hash: flate.Container.Hasher = .init(opts.container.val());
1932 var expected_size: u32 = 0;
1933 var vecs: [32][]const u8 = undefined;
1934 var vecs_n: usize = 0;
1935
1936 while (in.seek != in.end) {
1937 const VecInfo = packed struct(u58) {
1938 output: bool,
1939 /// If set, `data_len` and `splat` are reinterpreted as `capacity`
1940 /// and `preserve_len` respectively and `output` is treated as set.
1941 rebase: bool,
1942 block_aligning_len: bool,
1943 block_aligning_splat: bool,
1944 data_len: u18,
1945 splat: u18,
1946 data_off: u18,
1947 };
1948 var vec_info: VecInfo = @bitCast(in.takeLeb128(u58) catch |e| switch (e) {
1949 error.ReadFailed => unreachable,
1950 error.Overflow, error.EndOfStream => 0,
1951 });
1952
1953 {
1954 const buffered = r.writer.buffered().len + countVec(vecs[0..vecs_n]);
1955 const to_align = mem.alignForwardAnyAlign(usize, buffered, Raw.max_block_size) - buffered;
1956 assert((buffered + to_align) % Raw.max_block_size == 0);
1957
1958 if (vec_info.block_aligning_len) {
1959 vec_info.data_len = @intCast(to_align);
1960 } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and
1961 to_align % vec_info.data_len == 0)
1962 {
1963 vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1;
1964 }
1965 }
1966
1967 var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1;
1968 add_vec: {
1969 if (vec_info.rebase) break :add_vec;
1970 if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) >
1971 10 * (1 << 16))
1972 {
1973 // Skip this vector to avoid this test taking too long.
1974 // 10 maximum sized blocks is choosen as the limit since it is two more
1975 // than the maximum the implementation can output in one drain.
1976 splat = 1;
1977 break :add_vec;
1978 }
1979
1980 vecs[vecs_n] = data_buf[@min(
1981 data_base +% vec_info.data_off,
1982 data_buf.len - vec_info.data_len,
1983 )..][0..vec_info.data_len];
1984
1985 data_base +%= vec_info.data_len +% 3; // extra 3 to help catch aliasing bugs
1986
1987 for (0..splat) |_| expected_hash.update(vecs[vecs_n]);
1988 expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat;
1989 vecs_n += 1;
1990 }
1991
1992 const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or
1993 in.seek == in.end;
1994 if (want_drain and vecs_n != 0) {
1995 try r.writer.writeSplatAll(vecs[0..vecs_n], splat);
1996 vecs_n = 0;
1997 } else assert(splat == 1);
1998
1999 if (vec_info.rebase) {
2000 try r.writer.rebase(vec_info.data_len, @min(
2001 r.writer.buffer.len -| vec_info.data_len,
2002 vec_info.splat,
2003 ));
2004 }
2005 }
2006
2007 try r.writer.flush();
2008 try output.writer.flush();
2009
2010 try std.testing.expectEqual(.end, output.state);
2011 try std.testing.expectEqual(expected_size, output.data_size);
2012 switch (output.data_hash) {
2013 .raw => {},
2014 .gzip => |gz| {
2015 const expected_crc = expected_hash.gzip.crc.final();
2016 try std.testing.expectEqual(expected_crc, gz.crc.final());
2017 try std.testing.expectEqual(expected_crc, output.footer_hash);
2018 try std.testing.expectEqual(expected_size, output.footer_size);
2019 },
2020 .zlib => |zl| {
2021 const expected_adler = expected_hash.zlib.adler;
2022 try std.testing.expectEqual(expected_adler, zl.adler);
2023 try std.testing.expectEqual(expected_adler, output.footer_hash);
2024 },
2025 }
2026}
2027
2028/// Only performs huffman compression on data, does no matching.
2029pub const Huffman = struct {
2030 writer: Writer,
2031 bit_writer: BitWriter,
2032 hasher: flate.Container.Hasher,
2033
2034 const max_tokens: u16 = 65535 - 1; // one is reserved for EOF
2035
2036 /// While there is no minimum buffer size, it is recommended
2037 /// to be at least `flate.max_window_len` to improve compression.
2038 ///
2039 /// It is asserted `output` has a capacity of at least 8 bytes.
2040 pub fn init(output: *Writer, buffer: []u8, container: flate.Container) Writer.Error!Huffman {
2041 assert(output.buffer.len > 8);
2042
2043 try output.writeAll(container.header());
2044 return .{
2045 .writer = .{
2046 .buffer = buffer,
2047 .vtable = &.{
2048 .drain = Huffman.drain,
2049 .flush = Huffman.flush,
2050 .rebase = Huffman.rebase,
2051 },
2052 },
2053 .bit_writer = .init(output),
2054 .hasher = .init(container),
2055 };
2056 }
2057
2058 fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
2059 {
2060 //std.debug.print("drain {} (buffered)", .{w.buffered().len});
2061 //for (data) |d| std.debug.print("\n\t+ {}", .{d.len});
2062 //std.debug.print(" x {}\n\n", .{splat});
2063 }
2064
2065 const h: *Huffman = @fieldParentPtr("writer", w);
2066 const min_block = @min(w.buffer.len, max_tokens);
2067 const pattern = data[data.len - 1];
2068
2069 const data_bytes = Writer.countSplat(data, splat);
2070 const total_bytes = w.end + data_bytes;
2071 var rem_bytes = total_bytes;
2072 var rem_splat = splat;
2073 var rem_data = data;
2074 var rem_data_elem: []const u8 = w.buffered();
2075
2076 assert(rem_bytes > min_block);
2077 while (rem_bytes > min_block) { // not >= to allow `min_block` blocks to be marked as final
2078 // also, it handles the case of `min_block` being zero (no buffer)
2079 const block_size: u16 = @min(rem_bytes, max_tokens);
2080 rem_bytes -= block_size;
2081
2082 // Count frequencies
2083 comptime assert(max_tokens != 65535);
2084 var freqs: [257]u16 = @splat(0);
2085 freqs[256] = 1;
2086
2087 const start_splat = rem_splat;
2088 const start_data = rem_data;
2089 const start_data_elem = rem_data_elem;
2090
2091 var block_limit: Io.Limit = .limited(block_size);
2092 while (true) {
2093 const bytes = block_limit.sliceConst(rem_data_elem);
2094 const is_pattern = rem_splat != splat and bytes.len == pattern.len;
2095
2096 const mul = if (!is_pattern) 1 else @intFromEnum(block_limit) / pattern.len;
2097 assert(mul != 0);
2098 if (is_pattern) assert(mul <= rem_splat + 1); // one more for `rem_data`
2099
2100 for (bytes) |b| freqs[b] += @intCast(mul);
2101 rem_data_elem = rem_data_elem[bytes.len..];
2102 block_limit = block_limit.subtract(bytes.len * mul).?;
2103
2104 if (rem_data_elem.len == 0) {
2105 rem_data_elem = rem_data[0];
2106 if (rem_data.len != 1) {
2107 rem_data = rem_data[1..];
2108 } else if (rem_splat >= mul) {
2109 // if the counter was not the pattern, `mul` is always one, otherwise,
2110 // `mul` contains `rem_data`, however one more needs subtracted anyways
2111 // since the next pattern is also being taken.
2112 rem_splat -= mul;
2113 } else {
2114 // All of `data` has been consumed.
2115 assert(block_limit == .nothing);
2116 assert(rem_bytes == 0);
2117 // Since `rem_bytes` and `block_limit` are zero, these won't be used.
2118 rem_data = undefined;
2119 rem_data_elem = undefined;
2120 rem_splat = undefined;
2121 }
2122 }
2123 if (block_limit == .nothing) break;
2124 }
2125
2126 // Output block
2127 rem_splat = start_splat;
2128 rem_data = start_data;
2129 rem_data_elem = start_data_elem;
2130 block_limit = .limited(block_size);
2131
2132 var codes_buf: CodesBuf = .init;
2133 if (try h.outputHeader(&freqs, &codes_buf, block_size, false)) |table| {
2134 while (true) {
2135 const bytes = block_limit.sliceConst(rem_data_elem);
2136 rem_data_elem = rem_data_elem[bytes.len..];
2137 block_limit = block_limit.subtract(bytes.len).?;
2138
2139 h.hasher.update(bytes);
2140 for (bytes) |b| {
2141 try h.bit_writer.write(table.codes[b], table.bits[b]);
2142 }
2143
2144 if (rem_data_elem.len == 0) {
2145 rem_data_elem = rem_data[0];
2146 if (rem_data.len != 1) {
2147 rem_data = rem_data[1..];
2148 } else if (rem_splat != 0) {
2149 rem_splat -= 1;
2150 } else {
2151 // All of `data` has been consumed.
2152 assert(block_limit == .nothing);
2153 assert(rem_bytes == 0);
2154 // Since `rem_bytes` and `block_limit` are zero, these won't be used.
2155 rem_data = undefined;
2156 rem_data_elem = undefined;
2157 rem_splat = undefined;
2158 }
2159 }
2160 if (block_limit == .nothing) break;
2161 }
2162 try h.bit_writer.write(table.codes[256], table.bits[256]);
2163 } else while (true) {
2164 // Store block
2165
2166 // Write data that is not a full vector element
2167 const in_pattern = rem_splat != splat;
2168 const vec_elem_i, const in_data =
2169 @subWithOverflow(data.len - (rem_data.len - @intFromBool(in_pattern)), 1);
2170 const is_elem = in_data == 0 and data[vec_elem_i].len == rem_data_elem.len;
2171
2172 if (!is_elem or rem_data_elem.len > @intFromEnum(block_limit)) {
2173 block_limit = block_limit.subtract(rem_data_elem.len) orelse {
2174 try h.bit_writer.output.writeAll(rem_data_elem[0..@intFromEnum(block_limit)]);
2175 h.hasher.update(rem_data_elem[0..@intFromEnum(block_limit)]);
2176 rem_data_elem = rem_data_elem[@intFromEnum(block_limit)..];
2177 assert(rem_data_elem.len != 0);
2178 break;
2179 };
2180 try h.bit_writer.output.writeAll(rem_data_elem);
2181 h.hasher.update(rem_data_elem);
2182 } else {
2183 // Put `rem_data_elem` back in `rem_data`
2184 if (!in_pattern) {
2185 rem_data = data[vec_elem_i..];
2186 } else {
2187 rem_splat += 1;
2188 }
2189 }
2190 rem_data_elem = undefined; // it is always updated below
2191
2192 // Send through as much of the original vector as possible
2193 var vec_n: usize = 0;
2194 var vlimit = block_limit;
2195 const vec_splat = while (rem_data[vec_n..].len != 1) {
2196 vlimit = vlimit.subtract(rem_data[vec_n].len) orelse break 1;
2197 vec_n += 1;
2198 } else vec_splat: {
2199 // For `pattern.len == 0`, the value of `vec_splat` does not matter.
2200 const vec_splat = @intFromEnum(vlimit) / @max(1, pattern.len);
2201 if (pattern.len != 0) assert(vec_splat <= rem_splat + 1);
2202 vlimit = vlimit.subtract(pattern.len * vec_splat).?;
2203 vec_n += 1;
2204 break :vec_splat vec_splat;
2205 };
2206
2207 const n = if (vec_n != 0) n: {
2208 assert(@intFromEnum(block_limit) - @intFromEnum(vlimit) ==
2209 Writer.countSplat(rem_data[0..vec_n], vec_splat));
2210 break :n try h.bit_writer.output.writeSplat(rem_data[0..vec_n], vec_splat);
2211 } else 0; // Still go into the case below to advance the vector
2212 block_limit = block_limit.subtract(n).?;
2213 var consumed: Io.Limit = .limited(n);
2214
2215 while (rem_data.len != 1) {
2216 const elem = rem_data[0];
2217 rem_data = rem_data[1..];
2218 consumed = consumed.subtract(elem.len) orelse {
2219 h.hasher.update(elem[0..@intFromEnum(consumed)]);
2220 rem_data_elem = elem[@intFromEnum(consumed)..];
2221 break;
2222 };
2223 h.hasher.update(elem);
2224 } else {
2225 if (pattern.len == 0) {
2226 // All of `data` has been consumed. However, the general
2227 // case below does not work since it divides by zero.
2228 assert(consumed == .nothing);
2229 assert(block_limit == .nothing);
2230 assert(rem_bytes == 0);
2231 // Since `rem_bytes` and `block_limit` are zero, these won't be used.
2232 rem_splat = undefined;
2233 rem_data = undefined;
2234 rem_data_elem = undefined;
2235 break;
2236 }
2237
2238 const splatted = @intFromEnum(consumed) / pattern.len;
2239 const partial = @intFromEnum(consumed) % pattern.len;
2240 for (0..splatted) |_| h.hasher.update(pattern);
2241 h.hasher.update(pattern[0..partial]);
2242
2243 const taken_splat = splatted + 1;
2244 if (rem_splat >= taken_splat) {
2245 rem_splat -= taken_splat;
2246 rem_data_elem = pattern[partial..];
2247 } else {
2248 // All of `data` has been consumed.
2249 assert(partial == 0);
2250 assert(block_limit == .nothing);
2251 assert(rem_bytes == 0);
2252 // Since `rem_bytes` and `block_limit` are zero, these won't be used.
2253 rem_data = undefined;
2254 rem_data_elem = undefined;
2255 rem_splat = undefined;
2256 }
2257 }
2258
2259 if (block_limit == .nothing) break;
2260 }
2261 }
2262
2263 if (rem_bytes > data_bytes) {
2264 assert(rem_bytes - data_bytes == rem_data_elem.len);
2265 assert(&rem_data_elem[0] == &w.buffer[total_bytes - rem_bytes]);
2266 }
2267 return w.consume(total_bytes - rem_bytes);
2268 }
2269
2270 fn flush(w: *Writer) Writer.Error!void {
2271 defer w.* = .failing;
2272 const h: *Huffman = @fieldParentPtr("writer", w);
2273 try Huffman.rebaseInner(w, 0, w.buffer.len, true);
2274 try h.bit_writer.output.rebase(0, 1);
2275 h.bit_writer.byteAlign();
2276 try h.hasher.writeFooter(h.bit_writer.output);
2277 }
2278
2279 fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
2280 errdefer w.* = .failing;
2281 try Huffman.rebaseInner(w, preserve, capacity, false);
2282 }
2283
2284 fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
2285 const h: *Huffman = @fieldParentPtr("writer", w);
2286 assert(preserve + capacity <= w.buffer.len);
2287 if (eos) assert(capacity == w.buffer.len);
2288
2289 const preserved = @min(w.end, preserve);
2290 var remaining = w.buffer[0 .. w.end - preserved];
2291 while (remaining.len > max_tokens) { // not >= so there is always a block down below
2292 const bytes = remaining[0..max_tokens];
2293 remaining = remaining[max_tokens..];
2294 try h.outputBytes(bytes, false);
2295 }
2296
2297 // eos check required for empty block
2298 if (w.buffer.len - (remaining.len + preserved) < capacity or eos) {
2299 const bytes = remaining;
2300 remaining = &.{};
2301 try h.outputBytes(bytes, eos);
2302 }
2303
2304 _ = w.consume(w.end - preserved - remaining.len);
2305 }
2306
2307 fn outputBytes(h: *Huffman, bytes: []const u8, eos: bool) Writer.Error!void {
2308 comptime assert(max_tokens != 65535);
2309 assert(bytes.len <= max_tokens);
2310 var freqs: [257]u16 = @splat(0);
2311 freqs[256] = 1;
2312 for (bytes) |b| freqs[b] += 1;
2313 h.hasher.update(bytes);
2314
2315 var codes_buf: CodesBuf = .init;
2316 if (try h.outputHeader(&freqs, &codes_buf, @intCast(bytes.len), eos)) |table| {
2317 for (bytes) |b| {
2318 try h.bit_writer.write(table.codes[b], table.bits[b]);
2319 }
2320 try h.bit_writer.write(table.codes[256], table.bits[256]);
2321 } else {
2322 try h.bit_writer.output.writeAll(bytes);
2323 }
2324 }
2325
2326 const CodesBuf = struct {
2327 dyn_codes: [258]u16,
2328 dyn_bits: [258]u4,
2329
2330 pub const init: CodesBuf = .{
2331 .dyn_codes = @as([257]u16, undefined) ++ .{0},
2332 .dyn_bits = @as([257]u4, @splat(0)) ++ .{1},
2333 };
2334 };
2335
2336 /// Returns null if the block is stored.
2337 fn outputHeader(
2338 h: *Huffman,
2339 freqs: *const [257]u16,
2340 buf: *CodesBuf,
2341 bytes: u16,
2342 eos: bool,
2343 ) Writer.Error!?struct {
2344 codes: *const [257]u16,
2345 bits: *const [257]u4,
2346 } {
2347 assert(freqs[256] == 1);
2348 const dyn_codes_bitsize, _ = huffman.build(
2349 freqs,
2350 buf.dyn_codes[0..257],
2351 buf.dyn_bits[0..257],
2352 15,
2353 true,
2354 );
2355
2356 var clen_values: [258]u8 = undefined;
2357 var clen_extra: [258]u8 = undefined;
2358 var clen_freqs: [19]u16 = @splat(0);
2359 const clen_len, const clen_extra_bitsize = buildClen(
2360 &buf.dyn_bits,
2361 &clen_values,
2362 &clen_extra,
2363 &clen_freqs,
2364 );
2365
2366 var clen_codes: [19]u16 = undefined;
2367 var clen_bits: [19]u4 = @splat(0);
2368 const clen_codes_bitsize, _ = huffman.build(
2369 &clen_freqs,
2370 &clen_codes,
2371 &clen_bits,
2372 7,
2373 false,
2374 );
2375 const hclen = clenHlen(clen_freqs);
2376
2377 const dynamic_bitsize = @as(u32, 14) +
2378 (4 + @as(u6, hclen)) * 3 + clen_codes_bitsize + clen_extra_bitsize +
2379 dyn_codes_bitsize;
2380 const fixed_bitsize = n: {
2381 const freq7 = 1; // eos
2382 var freq9: u16 = 0;
2383 for (freqs[144..256]) |f| freq9 += f;
2384 const freq8: u16 = bytes - freq9;
2385 break :n @as(u32, freq7) * 7 + @as(u32, freq8) * 8 + @as(u32, freq9) * 9;
2386 };
2387 const stored_bitsize = n: {
2388 const stored_align_bits = -%(h.bit_writer.buffered_n +% 3);
2389 break :n stored_align_bits + @as(u32, 32) + @as(u32, bytes) * 8;
2390 };
2391
2392 //std.debug.print("@ {}{{{}}} ", .{ h.bit_writer.output.end, h.bit_writer.buffered_n });
2393 //std.debug.print("#{} -> s {} f {} d {}\n", .{ bytes, stored_bitsize, fixed_bitsize, dynamic_bitsize });
2394
2395 if (stored_bitsize <= @min(dynamic_bitsize, fixed_bitsize)) {
2396 try h.bit_writer.write(BlockHeader.int(.{ .kind = .stored, .final = eos }), 3);
2397 try h.bit_writer.output.rebase(0, 5);
2398 h.bit_writer.byteAlign();
2399 h.bit_writer.output.writeInt(u16, bytes, .little) catch unreachable;
2400 h.bit_writer.output.writeInt(u16, ~bytes, .little) catch unreachable;
2401 return null;
2402 }
2403
2404 if (fixed_bitsize <= dynamic_bitsize) {
2405 try h.bit_writer.write(BlockHeader.int(.{ .final = eos, .kind = .fixed }), 3);
2406 return .{
2407 .codes = token.fixed_lit_codes[0..257],
2408 .bits = token.fixed_lit_bits[0..257],
2409 };
2410 } else {
2411 try h.bit_writer.write(BlockHeader.Dynamic.int(.{
2412 .regular = .{ .final = eos, .kind = .dynamic },
2413 .hlit = 0,
2414 .hdist = 0,
2415 .hclen = hclen,
2416 }), 17);
2417 try h.bit_writer.writeClen(
2418 hclen,
2419 clen_values[0..clen_len],
2420 clen_extra[0..clen_len],
2421 clen_codes,
2422 clen_bits,
2423 );
2424 return .{ .codes = buf.dyn_codes[0..257], .bits = buf.dyn_bits[0..257] };
2425 }
2426 }
2427};
2428
2429test Huffman {
2430 const fbufs = try testingFreqBufs();
2431 defer if (!builtin.fuzz) std.testing.allocator.destroy(fbufs);
2432 try std.testing.fuzz(fbufs, testFuzzedHuffmanInput, .{});
2433}
2434
2435/// This function is derived from `testFuzzedRawInput` with a few changes for fuzzing `Huffman`.
2436fn testFuzzedHuffmanInput(fbufs: *const [2][65536]u8, input: []const u8) !void {
2437 var in: Io.Reader = .fixed(input);
2438 const opts: packed struct(u19) {
2439 container: PackedContainer,
2440 buf_len: u17,
2441 } = @bitCast(in.takeLeb128(u19) catch 0);
2442 var flate_buf: [2 * 65536]u8 = undefined;
2443 var flate_w: Writer = .fixed(&flate_buf);
2444 var h_buf: [2 * 65536]u8 = undefined;
2445 var h: Huffman = try .init(
2446 &flate_w,
2447 h_buf[0 .. opts.buf_len +% flate.max_window_len],
2448 opts.container.val(),
2449 );
2450
2451 var expected_hash: flate.Container.Hasher = .init(opts.container.val());
2452 var expected_size: u32 = 0;
2453 var vecs: [32][]const u8 = undefined;
2454 var vecs_n: usize = 0;
2455
2456 while (in.seek != in.end) {
2457 const VecInfo = packed struct(u55) {
2458 output: bool,
2459 /// If set, `data_len` and `splat` are reinterpreted as `capacity`
2460 /// and `preserve_len` respectively and `output` is treated as set.
2461 rebase: bool,
2462 block_aligning_len: bool,
2463 block_aligning_splat: bool,
2464 data_off_hi: u8,
2465 random_data: u1,
2466 data_len: u16,
2467 splat: u18,
2468 /// This is less useful as each value is part of the same gradient 'step'
2469 data_off_lo: u8,
2470 };
2471 var vec_info: VecInfo = @bitCast(in.takeLeb128(u55) catch |e| switch (e) {
2472 error.ReadFailed => unreachable,
2473 error.Overflow, error.EndOfStream => 0,
2474 });
2475
2476 {
2477 const buffered = h.writer.buffered().len + countVec(vecs[0..vecs_n]);
2478 const to_align = mem.alignForwardAnyAlign(usize, buffered, Huffman.max_tokens) - buffered;
2479 assert((buffered + to_align) % Huffman.max_tokens == 0);
2480
2481 if (vec_info.block_aligning_len) {
2482 vec_info.data_len = @intCast(to_align);
2483 } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and
2484 to_align % vec_info.data_len == 0)
2485 {
2486 vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1;
2487 }
2488 }
2489
2490 var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1;
2491 add_vec: {
2492 if (vec_info.rebase) break :add_vec;
2493 if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) > 4 * (1 << 16)) {
2494 // Skip this vector to avoid this test taking too long.
2495 splat = 1;
2496 break :add_vec;
2497 }
2498
2499 const data_buf = &fbufs[vec_info.random_data];
2500 vecs[vecs_n] = data_buf[@min(
2501 (@as(u16, vec_info.data_off_hi) << 8) | vec_info.data_off_lo,
2502 data_buf.len - vec_info.data_len,
2503 )..][0..vec_info.data_len];
2504
2505 for (0..splat) |_| expected_hash.update(vecs[vecs_n]);
2506 expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat;
2507 vecs_n += 1;
2508 }
2509
2510 const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or
2511 in.seek == in.end;
2512 if (want_drain and vecs_n != 0) {
2513 var n = h.writer.buffered().len + Writer.countSplat(vecs[0..vecs_n], splat);
2514 const oos = h.writer.writeSplatAll(vecs[0..vecs_n], splat) == error.WriteFailed;
2515 n -= h.writer.buffered().len;
2516 const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable;
2517 const lim = flate_w.end + 6 * block_lim + n; // 6 since block header may span two bytes
2518 if (flate_w.end > lim) return error.OverheadTooLarge;
2519 if (oos) return;
2520
2521 vecs_n = 0;
2522 } else assert(splat == 1);
2523
2524 if (vec_info.rebase) {
2525 const old_end = flate_w.end;
2526 var n = h.writer.buffered().len;
2527 const oos = h.writer.rebase(vec_info.data_len, @min(
2528 h.writer.buffer.len -| vec_info.data_len,
2529 vec_info.splat,
2530 )) == error.WriteFailed;
2531 n -= h.writer.buffered().len;
2532 const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable;
2533 const lim = old_end + 6 * block_lim + n; // 6 since block header may span two bytes
2534 if (flate_w.end > lim) return error.OverheadTooLarge;
2535 if (oos) return;
2536 }
2537 }
2538
2539 {
2540 const old_end = flate_w.end;
2541 const n = h.writer.buffered().len;
2542 const oos = h.writer.flush() == error.WriteFailed;
2543 assert(h.writer.buffered().len == 0);
2544 const block_lim = @max(1, math.divCeil(usize, n, Huffman.max_tokens) catch unreachable);
2545 const lim = old_end + 6 * block_lim + n + opts.container.val().footerSize();
2546 if (flate_w.end > lim) return error.OverheadTooLarge;
2547 if (oos) return;
2548 }
2549
2550 try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash);
2551}