Commit f50c647977
Changed files (8)
lib/std/compress/flate/BlockWriter.zig
@@ -1,591 +0,0 @@
-//! Accepts list of tokens, decides what is best block type to write. What block
-//! type will provide best compression. Writes header and body of the block.
-const std = @import("std");
-const assert = std.debug.assert;
-const Writer = std.Io.Writer;
-
-const BlockWriter = @This();
-const flate = @import("../flate.zig");
-const Compress = flate.Compress;
-const HuffmanEncoder = flate.HuffmanEncoder;
-const Token = @import("Token.zig");
-
-const codegen_order = HuffmanEncoder.codegen_order;
-const end_code_mark = 255;
-
-output: *Writer,
-
-codegen_freq: [HuffmanEncoder.codegen_code_count]u16,
-literal_freq: [HuffmanEncoder.max_num_lit]u16,
-distance_freq: [HuffmanEncoder.distance_code_count]u16,
-codegen: [HuffmanEncoder.max_num_lit + HuffmanEncoder.distance_code_count + 1]u8,
-literal_encoding: HuffmanEncoder,
-distance_encoding: HuffmanEncoder,
-codegen_encoding: HuffmanEncoder,
-fixed_literal_encoding: HuffmanEncoder,
-fixed_distance_encoding: HuffmanEncoder,
-huff_distance: HuffmanEncoder,
-
-fixed_literal_codes: [HuffmanEncoder.max_num_frequencies]HuffmanEncoder.Code,
-fixed_distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
-distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
-
-pub fn init(output: *Writer) BlockWriter {
- return .{
- .output = output,
- .codegen_freq = undefined,
- .literal_freq = undefined,
- .distance_freq = undefined,
- .codegen = undefined,
- .literal_encoding = undefined,
- .distance_encoding = undefined,
- .codegen_encoding = undefined,
- .fixed_literal_encoding = undefined,
- .fixed_distance_encoding = undefined,
- .huff_distance = undefined,
- .fixed_literal_codes = undefined,
- .fixed_distance_codes = undefined,
- .distance_codes = undefined,
- };
-}
-
-pub fn initBuffers(bw: *BlockWriter) void {
- bw.fixed_literal_encoding = .fixedLiteralEncoder(&bw.fixed_literal_codes);
- bw.fixed_distance_encoding = .fixedDistanceEncoder(&bw.fixed_distance_codes);
- bw.huff_distance = .huffmanDistanceEncoder(&bw.distance_codes);
-}
-
-/// Flush intrenal bit buffer to the writer.
-/// Should be called only when bit stream is at byte boundary.
-///
-/// That is after final block; when last byte could be incomplete or
-/// after stored block; which is aligned to the byte boundary (it has x
-/// padding bits after first 3 bits).
-pub fn flush(self: *BlockWriter) Writer.Error!void {
- try self.bit_writer.flush();
-}
-
-fn writeCode(self: *BlockWriter, c: Compress.HuffCode) Writer.Error!void {
- try self.bit_writer.writeBits(c.code, c.len);
-}
-
-/// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
-/// the literal and distance lengths arrays (which are concatenated into a single
-/// array). This method generates that run-length encoding.
-///
-/// The result is written into the codegen array, and the frequencies
-/// of each code is written into the codegen_freq array.
-/// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
-/// information. Code bad_code is an end marker
-///
-/// num_literals: The number of literals in literal_encoding
-/// num_distances: The number of distances in distance_encoding
-/// lit_enc: The literal encoder to use
-/// dist_enc: The distance encoder to use
-fn generateCodegen(
- self: *BlockWriter,
- num_literals: u32,
- num_distances: u32,
- lit_enc: *Compress.LiteralEncoder,
- dist_enc: *Compress.DistanceEncoder,
-) void {
- for (self.codegen_freq, 0..) |_, i| {
- self.codegen_freq[i] = 0;
- }
-
- // Note that we are using codegen both as a temporary variable for holding
- // a copy of the frequencies, and as the place where we put the result.
- // This is fine because the output is always shorter than the input used
- // so far.
- var codegen = &self.codegen; // cache
- // Copy the concatenated code sizes to codegen. Put a marker at the end.
- var cgnl = codegen[0..num_literals];
- for (cgnl, 0..) |_, i| {
- cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
- }
-
- cgnl = codegen[num_literals .. num_literals + num_distances];
- for (cgnl, 0..) |_, i| {
- cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
- }
- codegen[num_literals + num_distances] = end_code_mark;
-
- var size = codegen[0];
- var count: i32 = 1;
- var out_index: u32 = 0;
- var in_index: u32 = 1;
- while (size != end_code_mark) : (in_index += 1) {
- // INVARIANT: We have seen "count" copies of size that have not yet
- // had output generated for them.
- const next_size = codegen[in_index];
- if (next_size == size) {
- count += 1;
- continue;
- }
- // We need to generate codegen indicating "count" of size.
- if (size != 0) {
- codegen[out_index] = size;
- out_index += 1;
- self.codegen_freq[size] += 1;
- count -= 1;
- while (count >= 3) {
- var n: i32 = 6;
- if (n > count) {
- n = count;
- }
- codegen[out_index] = 16;
- out_index += 1;
- codegen[out_index] = @as(u8, @intCast(n - 3));
- out_index += 1;
- self.codegen_freq[16] += 1;
- count -= n;
- }
- } else {
- while (count >= 11) {
- var n: i32 = 138;
- if (n > count) {
- n = count;
- }
- codegen[out_index] = 18;
- out_index += 1;
- codegen[out_index] = @as(u8, @intCast(n - 11));
- out_index += 1;
- self.codegen_freq[18] += 1;
- count -= n;
- }
- if (count >= 3) {
- // 3 <= count <= 10
- codegen[out_index] = 17;
- out_index += 1;
- codegen[out_index] = @as(u8, @intCast(count - 3));
- out_index += 1;
- self.codegen_freq[17] += 1;
- count = 0;
- }
- }
- count -= 1;
- while (count >= 0) : (count -= 1) {
- codegen[out_index] = size;
- out_index += 1;
- self.codegen_freq[size] += 1;
- }
- // Set up invariant for next time through the loop.
- size = next_size;
- count = 1;
- }
- // Marker indicating the end of the codegen.
- codegen[out_index] = end_code_mark;
-}
-
-const DynamicSize = struct {
- size: u32,
- num_codegens: u32,
-};
-
-/// dynamicSize returns the size of dynamically encoded data in bits.
-fn dynamicSize(
- self: *BlockWriter,
- lit_enc: *Compress.LiteralEncoder, // literal encoder
- dist_enc: *Compress.DistanceEncoder, // distance encoder
- extra_bits: u32,
-) DynamicSize {
- var num_codegens = self.codegen_freq.len;
- while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
- num_codegens -= 1;
- }
- const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
- self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
- self.codegen_freq[16] * 2 +
- self.codegen_freq[17] * 3 +
- self.codegen_freq[18] * 7;
- const size = header +
- lit_enc.bitLength(&self.literal_freq) +
- dist_enc.bitLength(&self.distance_freq) +
- extra_bits;
-
- return DynamicSize{
- .size = @as(u32, @intCast(size)),
- .num_codegens = @as(u32, @intCast(num_codegens)),
- };
-}
-
-/// fixedSize returns the size of dynamically encoded data in bits.
-fn fixedSize(self: *BlockWriter, extra_bits: u32) u32 {
- return 3 +
- self.fixed_literal_encoding.bitLength(&self.literal_freq) +
- self.fixed_distance_encoding.bitLength(&self.distance_freq) +
- extra_bits;
-}
-
-const StoredSize = struct {
- size: u32,
- storable: bool,
-};
-
-/// storedSizeFits calculates the stored size, including header.
-/// The function returns the size in bits and whether the block
-/// fits inside a single block.
-fn storedSizeFits(in: ?[]const u8) StoredSize {
- if (in == null) {
- return .{ .size = 0, .storable = false };
- }
- if (in.?.len <= HuffmanEncoder.max_store_block_size) {
- return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
- }
- return .{ .size = 0, .storable = false };
-}
-
-/// Write the header of a dynamic Huffman block to the output stream.
-///
-/// num_literals: The number of literals specified in codegen
-/// num_distances: The number of distances specified in codegen
-/// num_codegens: The number of codegens used in codegen
-/// eof: Is it the end-of-file? (end of stream)
-fn dynamicHeader(
- self: *BlockWriter,
- num_literals: u32,
- num_distances: u32,
- num_codegens: u32,
- eof: bool,
-) Writer.Error!void {
- const first_bits: u32 = if (eof) 5 else 4;
- try self.bit_writer.writeBits(first_bits, 3);
- try self.bit_writer.writeBits(num_literals - 257, 5);
- try self.bit_writer.writeBits(num_distances - 1, 5);
- try self.bit_writer.writeBits(num_codegens - 4, 4);
-
- var i: u32 = 0;
- while (i < num_codegens) : (i += 1) {
- const value = self.codegen_encoding.codes[codegen_order[i]].len;
- try self.bit_writer.writeBits(value, 3);
- }
-
- i = 0;
- while (true) {
- const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
- i += 1;
- if (code_word == end_code_mark) {
- break;
- }
- try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
-
- switch (code_word) {
- 16 => {
- try self.bit_writer.writeBits(self.codegen[i], 2);
- i += 1;
- },
- 17 => {
- try self.bit_writer.writeBits(self.codegen[i], 3);
- i += 1;
- },
- 18 => {
- try self.bit_writer.writeBits(self.codegen[i], 7);
- i += 1;
- },
- else => {},
- }
- }
-}
-
-fn storedHeader(self: *BlockWriter, length: usize, eof: bool) Writer.Error!void {
- assert(length <= 65535);
- const flag: u32 = if (eof) 1 else 0;
- try self.bit_writer.writeBits(flag, 3);
- try self.flush();
- const l: u16 = @intCast(length);
- try self.bit_writer.writeBits(l, 16);
- try self.bit_writer.writeBits(~l, 16);
-}
-
-fn fixedHeader(self: *BlockWriter, eof: bool) Writer.Error!void {
- // Indicate that we are a fixed Huffman block
- var value: u32 = 2;
- if (eof) {
- value = 3;
- }
- try self.bit_writer.writeBits(value, 3);
-}
-
-/// Write a block of tokens with the smallest encoding. Will choose block type.
-/// The original input can be supplied, and if the huffman encoded data
-/// is larger than the original bytes, the data will be written as a
-/// stored block.
-/// If the input is null, the tokens will always be Huffman encoded.
-pub fn write(self: *BlockWriter, tokens: []const Token, eof: bool, input: ?[]const u8) Writer.Error!void {
- const lit_and_dist = self.indexTokens(tokens);
- const num_literals = lit_and_dist.num_literals;
- const num_distances = lit_and_dist.num_distances;
-
- var extra_bits: u32 = 0;
- const ret = storedSizeFits(input);
- const stored_size = ret.size;
- const storable = ret.storable;
-
- if (storable) {
- // We only bother calculating the costs of the extra bits required by
- // the length of distance fields (which will be the same for both fixed
- // and dynamic encoding), if we need to compare those two encodings
- // against stored encoding.
- var length_code: u16 = Token.length_codes_start + 8;
- while (length_code < num_literals) : (length_code += 1) {
- // First eight length codes have extra size = 0.
- extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
- @as(u32, @intCast(Token.lengthExtraBits(length_code)));
- }
- var distance_code: u16 = 4;
- while (distance_code < num_distances) : (distance_code += 1) {
- // First four distance codes have extra size = 0.
- extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
- @as(u32, @intCast(Token.distanceExtraBits(distance_code)));
- }
- }
-
- // Figure out smallest code.
- // Fixed Huffman baseline.
- var literal_encoding = &self.fixed_literal_encoding;
- var distance_encoding = &self.fixed_distance_encoding;
- var size = self.fixedSize(extra_bits);
-
- // Dynamic Huffman?
- var num_codegens: u32 = 0;
-
- // Generate codegen and codegenFrequencies, which indicates how to encode
- // the literal_encoding and the distance_encoding.
- self.generateCodegen(
- num_literals,
- num_distances,
- &self.literal_encoding,
- &self.distance_encoding,
- );
- self.codegen_encoding.generate(self.codegen_freq[0..], 7);
- const dynamic_size = self.dynamicSize(
- &self.literal_encoding,
- &self.distance_encoding,
- extra_bits,
- );
- const dyn_size = dynamic_size.size;
- num_codegens = dynamic_size.num_codegens;
-
- if (dyn_size < size) {
- size = dyn_size;
- literal_encoding = &self.literal_encoding;
- distance_encoding = &self.distance_encoding;
- }
-
- // Stored bytes?
- if (storable and stored_size < size) {
- try self.storedBlock(input.?, eof);
- return;
- }
-
- // Huffman.
- if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
- try self.fixedHeader(eof);
- } else {
- try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
- }
-
- // Write the tokens.
- try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
-}
-
-pub fn storedBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void {
- try self.storedHeader(input.len, eof);
- try self.bit_writer.writeBytes(input);
-}
-
-/// writeBlockDynamic encodes a block using a dynamic Huffman table.
-/// This should be used if the symbols used have a disproportionate
-/// histogram distribution.
-/// If input is supplied and the compression savings are below 1/16th of the
-/// input size the block is stored.
-fn dynamicBlock(
- self: *BlockWriter,
- tokens: []const Token,
- eof: bool,
- input: ?[]const u8,
-) Writer.Error!void {
- const total_tokens = self.indexTokens(tokens);
- const num_literals = total_tokens.num_literals;
- const num_distances = total_tokens.num_distances;
-
- // Generate codegen and codegenFrequencies, which indicates how to encode
- // the literal_encoding and the distance_encoding.
- self.generateCodegen(
- num_literals,
- num_distances,
- &self.literal_encoding,
- &self.distance_encoding,
- );
- self.codegen_encoding.generate(self.codegen_freq[0..], 7);
- const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
- const size = dynamic_size.size;
- const num_codegens = dynamic_size.num_codegens;
-
- // Store bytes, if we don't get a reasonable improvement.
-
- const stored_size = storedSizeFits(input);
- const ssize = stored_size.size;
- const storable = stored_size.storable;
- if (storable and ssize < (size + (size >> 4))) {
- try self.storedBlock(input.?, eof);
- return;
- }
-
- // Write Huffman table.
- try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
-
- // Write the tokens.
- try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
-}
-
-const TotalIndexedTokens = struct {
- num_literals: u32,
- num_distances: u32,
-};
-
-/// Indexes a slice of tokens followed by an end_block_marker, and updates
-/// literal_freq and distance_freq, and generates literal_encoding
-/// and distance_encoding.
-/// The number of literal and distance tokens is returned.
-fn indexTokens(self: *BlockWriter, tokens: []const Token) TotalIndexedTokens {
- var num_literals: u32 = 0;
- var num_distances: u32 = 0;
-
- for (self.literal_freq, 0..) |_, i| {
- self.literal_freq[i] = 0;
- }
- for (self.distance_freq, 0..) |_, i| {
- self.distance_freq[i] = 0;
- }
-
- for (tokens) |t| {
- if (t.kind == Token.Kind.literal) {
- self.literal_freq[t.literal()] += 1;
- continue;
- }
- self.literal_freq[t.lengthCode()] += 1;
- self.distance_freq[t.distanceCode()] += 1;
- }
- // add end_block_marker token at the end
- self.literal_freq[HuffmanEncoder.end_block_marker] += 1;
-
- // get the number of literals
- num_literals = @as(u32, @intCast(self.literal_freq.len));
- while (self.literal_freq[num_literals - 1] == 0) {
- num_literals -= 1;
- }
- // get the number of distances
- num_distances = @as(u32, @intCast(self.distance_freq.len));
- while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
- num_distances -= 1;
- }
- if (num_distances == 0) {
- // We haven't found a single match. If we want to go with the dynamic encoding,
- // we should count at least one distance to be sure that the distance huffman tree could be encoded.
- self.distance_freq[0] = 1;
- num_distances = 1;
- }
- self.literal_encoding.generate(&self.literal_freq, 15);
- self.distance_encoding.generate(&self.distance_freq, 15);
- return TotalIndexedTokens{
- .num_literals = num_literals,
- .num_distances = num_distances,
- };
-}
-
-/// Writes a slice of tokens to the output followed by and end_block_marker.
-/// codes for literal and distance encoding must be supplied.
-fn writeTokens(
- self: *BlockWriter,
- tokens: []const Token,
- le_codes: []Compress.HuffCode,
- oe_codes: []Compress.HuffCode,
-) Writer.Error!void {
- for (tokens) |t| {
- if (t.kind == Token.Kind.literal) {
- try self.writeCode(le_codes[t.literal()]);
- continue;
- }
-
- // Write the length
- const le = t.lengthEncoding();
- try self.writeCode(le_codes[le.code]);
- if (le.extra_bits > 0) {
- try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
- }
-
- // Write the distance
- const oe = t.distanceEncoding();
- try self.writeCode(oe_codes[oe.code]);
- if (oe.extra_bits > 0) {
- try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
- }
- }
- // add end_block_marker at the end
- try self.writeCode(le_codes[HuffmanEncoder.end_block_marker]);
-}
-
-/// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
-/// if the results only gains very little from compression.
-pub fn huffmanBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void {
- // Add everything as literals
- histogram(input, &self.literal_freq);
-
- self.literal_freq[HuffmanEncoder.end_block_marker] = 1;
-
- const num_literals = HuffmanEncoder.end_block_marker + 1;
- self.distance_freq[0] = 1;
- const num_distances = 1;
-
- self.literal_encoding.generate(&self.literal_freq, 15);
-
- // Figure out smallest code.
- // Always use dynamic Huffman or Store
- var num_codegens: u32 = 0;
-
- // Generate codegen and codegenFrequencies, which indicates how to encode
- // the literal_encoding and the distance_encoding.
- self.generateCodegen(
- num_literals,
- num_distances,
- &self.literal_encoding,
- &self.huff_distance,
- );
- self.codegen_encoding.generate(self.codegen_freq[0..], 7);
- const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
- const size = dynamic_size.size;
- num_codegens = dynamic_size.num_codegens;
-
- // Store bytes, if we don't get a reasonable improvement.
- const stored_size_ret = storedSizeFits(input);
- const ssize = stored_size_ret.size;
- const storable = stored_size_ret.storable;
-
- if (storable and ssize < (size + (size >> 4))) {
- try self.storedBlock(input, eof);
- return;
- }
-
- // Huffman.
- try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
- const encoding = self.literal_encoding.codes[0..257];
-
- for (input) |t| {
- const c = encoding[t];
- try self.bit_writer.writeBits(c.code, c.len);
- }
- try self.writeCode(encoding[HuffmanEncoder.end_block_marker]);
-}
-
-fn histogram(b: []const u8, h: *[286]u16) void {
- // Clear histogram
- for (h, 0..) |_, i| {
- h[i] = 0;
- }
-
- var lh = h.*[0..256];
- for (b) |t| {
- lh[t] += 1;
- }
-}
lib/std/compress/flate/Compress.zig
@@ -1,332 +1,2550 @@
-//! Default compression algorithm. Has two steps: tokenization and token
-//! encoding.
+//! Allocates statically ~224K (128K lookup, 96K tokens).
//!
-//! Tokenization takes uncompressed input stream and produces list of tokens.
-//! Each token can be literal (byte of data) or match (backrefernce to previous
-//! data with length and distance). Tokenization accumulators 32K tokens, when
-//! full or `flush` is called tokens are passed to the `block_writer`. Level
-//! defines how hard (how slow) it tries to find match.
-//!
-//! Block writer will decide which type of deflate block to write (stored, fixed,
-//! dynamic) and encode tokens to the output byte stream. Client has to call
-//! `finish` to write block with the final bit set.
-//!
-//! Container defines type of header and footer which can be gzip, zlib or raw.
-//! They all share same deflate body. Raw has no header or footer just deflate
-//! body.
-//!
-//! Compression algorithm explained in rfc-1951 (slightly edited for this case):
-//!
-//! The compressor uses a chained hash table `lookup` to find duplicated
-//! strings, using a hash function that operates on 4-byte sequences. At any
-//! given point during compression, let XYZW be the next 4 input bytes
-//! (lookahead) to be examined (not necessarily all different, of course).
-//! First, the compressor examines the hash chain for XYZW. If the chain is
-//! empty, the compressor simply writes out X as a literal byte and advances
-//! one byte in the input. If the hash chain is not empty, indicating that the
-//! sequence XYZW (or, if we are unlucky, some other 4 bytes with the same
-//! hash function value) has occurred recently, the compressor compares all
-//! strings on the XYZW hash chain with the actual input data sequence
-//! starting at the current point, and selects the longest match.
-//!
-//! To improve overall compression, the compressor defers the selection of
-//! matches ("lazy matching"): after a match of length N has been found, the
-//! compressor searches for a longer match starting at the next input byte. If
-//! it finds a longer match, it truncates the previous match to a length of
-//! one (thus producing a single literal byte) and then emits the longer
-//! match. Otherwise, it emits the original match, and, as described above,
-//! advances N bytes before continuing.
-//!
-//!
-//! Allocates statically ~400K (192K lookup, 128K tokens, 64K window).
+//! The source of an `error.WriteFailed` is always the backing writer. After an
+//! `error.WriteFailed`, the `.writer` becomes `.failing` and is unrecoverable.
+//! After a `flush`, the writer also becomes `.failing` since the stream has
+//! been finished. This behavior also applies to `Raw` and `Huffman`.
+
+// Implementation details:
+// A chained hash table is used to find matches. `drain` always preserves `flate.history_len`
+// bytes to use as a history and avoids tokenizing the final bytes since they can be part of
+// a longer match with unwritten bytes (unless it is a `flush`). The minimum match searched
+// for is of length `seq_bytes`. If a match is made, a longer match is also checked for at
+// the next byte (lazy matching) if the last match does not meet the `Options.lazy` threshold.
+//
+// Up to `block_token` tokens are accumalated in `buffered_tokens` and are outputted in
+// `write_block` which determines the optimal block type and frequencies.
const builtin = @import("builtin");
const std = @import("std");
-const assert = std.debug.assert;
-const testing = std.testing;
-const expect = testing.expect;
const mem = std.mem;
const math = std.math;
-const Writer = std.Io.Writer;
+const assert = std.debug.assert;
+const Io = std.Io;
+const Writer = Io.Writer;
const Compress = @This();
-const Token = @import("Token.zig");
-const BlockWriter = @import("BlockWriter.zig");
+const token = @import("token.zig");
const flate = @import("../flate.zig");
-const Container = flate.Container;
-const Lookup = @import("Lookup.zig");
-const HuffmanEncoder = flate.HuffmanEncoder;
-const LiteralNode = HuffmanEncoder.LiteralNode;
-
-lookup: Lookup = .{},
-tokens: Tokens = .{},
-block_writer: BlockWriter,
-level: LevelArgs,
-hasher: Container.Hasher,
-writer: Writer,
-state: State,
-// Match and literal at the previous position.
-// Used for lazy match finding in processWindow.
-prev_match: ?Token = null,
-prev_literal: ?u8 = null,
+/// Until #104 is implemented, a ?u15 takes 4 bytes, which is unacceptable
+/// as it doubles the size of this already massive structure.
+///
+/// Also, there are no `to` / `from` methods because LLVM 21 does not
+/// optimize away the conversion from and to `?u15`.
+const PackedOptionalU15 = packed struct(u16) {
+ value: u15,
+ is_null: bool,
-pub const State = enum { header, middle, ended };
+ pub fn int(p: PackedOptionalU15) u16 {
+ return @bitCast(p);
+ }
-/// Trades between speed and compression size.
-/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43)
-/// levels 1-3 are using different algorithm to perform faster but with less
-/// compression. That is not implemented here.
-pub const Level = enum(u4) {
- level_4 = 4,
- level_5 = 5,
- level_6 = 6,
- level_7 = 7,
- level_8 = 8,
- level_9 = 9,
-
- fast = 0xb,
- default = 0xc,
- best = 0xd,
+ pub const null_bit: PackedOptionalU15 = .{ .value = 0, .is_null = true };
};
-/// Number of tokens to accumulate in deflate before starting block encoding.
-///
-/// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
-/// 8 and max 9 that gives 14 or 15 bits.
-pub const n_tokens = 1 << 15;
-
-/// Algorithm knobs for each level.
-const LevelArgs = struct {
- good: u16, // Do less lookups if we already have match of this length.
- nice: u16, // Stop looking for better match if we found match with at least this length.
- lazy: u16, // Don't do lazy match find if got match with at least this length.
- chain: u16, // How many lookups for previous match to perform.
-
- pub fn get(level: Level) LevelArgs {
- return switch (level) {
- .fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 },
- .level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 },
- .default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 },
- .level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 },
- .level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 },
- .best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 },
+/// After `flush` is called, all vtable calls with result in `error.WriteFailed.`
+writer: Writer,
+has_history: bool,
+bit_writer: BitWriter,
+buffered_tokens: struct {
+ /// List of `TokenBufferEntryHeader`s and their trailing data.
+ list: [@as(usize, block_tokens) * 3]u8,
+ pos: u32,
+ n: u16,
+ lit_freqs: [286]u16,
+ dist_freqs: [30]u16,
+
+ pub const empty: @This() = .{
+ .list = undefined,
+ .pos = 0,
+ .n = 0,
+ .lit_freqs = @splat(0),
+ .dist_freqs = @splat(0),
+ };
+},
+lookup: struct {
+ /// Indexes are the hashes of four-bytes sequences.
+ ///
+ /// Values are the positions in `chain` of the previous four bytes with the same hash.
+ head: [1 << lookup_hash_bits]PackedOptionalU15,
+ /// Values are the non-zero number of bytes backwards in the history with the same hash.
+ ///
+ /// The relationship of chain indexes and bytes relative to the latest history byte is
+ /// `chain_pos -% chain_index = history_index`.
+ chain: [32768]PackedOptionalU15,
+ /// The index in `chain` which is of the newest byte of the history.
+ chain_pos: u15,
+},
+container: flate.Container,
+hasher: flate.Container.Hasher,
+opts: Options,
+
+const BitWriter = struct {
+ output: *Writer,
+ buffered: u7,
+ buffered_n: u3,
+
+ pub fn init(w: *Writer) BitWriter {
+ return .{
+ .output = w,
+ .buffered = 0,
+ .buffered_n = 0,
};
}
+
+ /// Asserts `bits` is zero-extended
+ pub fn write(b: *BitWriter, bits: u56, n: u6) Writer.Error!void {
+ assert(@as(u8, b.buffered) >> b.buffered_n == 0);
+ assert(@as(u57, bits) >> n == 0); // n may be 56 so u57 is needed
+ const combined = @shlExact(@as(u64, bits), b.buffered_n) | b.buffered;
+ const combined_bits = @as(u6, b.buffered_n) + n;
+
+ const out = try b.output.writableSliceGreedy(8);
+ mem.writeInt(u64, out[0..8], combined, .little);
+ b.output.advance(combined_bits / 8);
+
+ b.buffered_n = @truncate(combined_bits);
+ b.buffered = @intCast(combined >> (combined_bits - b.buffered_n));
+ }
+
+ /// Assserts one byte can be written to `b.otuput` without rebasing.
+ pub fn byteAlign(b: *BitWriter) void {
+ b.output.unusedCapacitySlice()[0] = b.buffered;
+ b.output.advance(@intFromBool(b.buffered_n != 0));
+ b.buffered = 0;
+ b.buffered_n = 0;
+ }
+
+ pub fn writeClen(
+ b: *BitWriter,
+ hclen: u4,
+ clen_values: []u8,
+ clen_extra: []u8,
+ clen_codes: [19]u16,
+ clen_bits: [19]u4,
+ ) Writer.Error!void {
+ // Write the first four clen entries seperately since they are always present,
+ // and writing them all at once takes too many bits.
+ try b.write(clen_bits[token.codegen_order[0]] |
+ @shlExact(@as(u6, clen_bits[token.codegen_order[1]]), 3) |
+ @shlExact(@as(u9, clen_bits[token.codegen_order[2]]), 6) |
+ @shlExact(@as(u12, clen_bits[token.codegen_order[3]]), 9), 12);
+
+ var i = hclen;
+ var clen_bits_table: u45 = 0;
+ while (i != 0) {
+ i -= 1;
+ clen_bits_table <<= 3;
+ clen_bits_table |= clen_bits[token.codegen_order[4..][i]];
+ }
+ try b.write(clen_bits_table, @as(u6, hclen) * 3);
+
+ for (clen_values, clen_extra) |value, extra| {
+ try b.write(
+ clen_codes[value] | @shlExact(@as(u16, extra), clen_bits[value]),
+ clen_bits[value] + @as(u3, switch (value) {
+ 0...15 => 0,
+ 16 => 2,
+ 17 => 3,
+ 18 => 7,
+ else => unreachable,
+ }),
+ );
+ }
+ }
+};
+
+/// Number of tokens to accumulate before outputing as a block.
+/// The maximum value is `math.maxInt(u16) - 1` since one token is reserved for end-of-block.
+const block_tokens: u16 = 1 << 15;
+const lookup_hash_bits = 15;
+const Hash = u16; // `u[lookup_hash_bits]` is not used due to worse optimization (with LLVM 21)
+const seq_bytes = 3; // not intended to be changed
+const Seq = std.meta.Int(.unsigned, seq_bytes * 8);
+
+const TokenBufferEntryHeader = packed struct(u16) {
+ kind: enum(u1) {
+ /// Followed by non-zero `data` byte literals.
+ bytes,
+ /// Followed by the length as a byte
+ match,
+ },
+ data: u15,
+};
+
+const BlockHeader = packed struct(u3) {
+ final: bool,
+ kind: enum(u2) { stored, fixed, dynamic, _ },
+
+ pub fn int(h: BlockHeader) u3 {
+ return @bitCast(h);
+ }
+
+ pub const Dynamic = packed struct(u17) {
+ regular: BlockHeader,
+ hlit: u5,
+ hdist: u5,
+ hclen: u4,
+
+ pub fn int(h: Dynamic) u17 {
+ return @bitCast(h);
+ }
+ };
};
+fn outputMatch(c: *Compress, dist: u15, len: u8) Writer.Error!void {
+ // This must come first. Instead of ensuring a full block is never left buffered,
+ // draining it is defered to allow end of stream to be indicated.
+ if (c.buffered_tokens.n == block_tokens) {
+ @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
+ try c.writeBlock(false);
+ }
+ const header: TokenBufferEntryHeader = .{ .kind = .match, .data = dist };
+ c.buffered_tokens.list[c.buffered_tokens.pos..][0..2].* = @bitCast(header);
+ c.buffered_tokens.list[c.buffered_tokens.pos + 2] = len;
+ c.buffered_tokens.pos += 3;
+ c.buffered_tokens.n += 1;
+
+ c.buffered_tokens.lit_freqs[@as(usize, 257) + token.LenCode.fromVal(len).toInt()] += 1;
+ c.buffered_tokens.dist_freqs[token.DistCode.fromVal(dist).toInt()] += 1;
+}
+
+fn outputBytes(c: *Compress, bytes: []const u8) Writer.Error!void {
+ var remaining = bytes;
+ while (remaining.len != 0) {
+ if (c.buffered_tokens.n == block_tokens) {
+ @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
+ try c.writeBlock(false);
+ }
+
+ const n = @min(remaining.len, block_tokens - c.buffered_tokens.n, math.maxInt(u15));
+ assert(n != 0);
+ const header: TokenBufferEntryHeader = .{ .kind = .bytes, .data = n };
+ c.buffered_tokens.list[c.buffered_tokens.pos..][0..2].* = @bitCast(header);
+ @memcpy(c.buffered_tokens.list[c.buffered_tokens.pos + 2 ..][0..n], remaining[0..n]);
+ c.buffered_tokens.pos += @as(u32, 2) + n;
+ c.buffered_tokens.n += n;
+
+ for (remaining[0..n]) |b| {
+ c.buffered_tokens.lit_freqs[b] += 1;
+ }
+ remaining = remaining[n..];
+ }
+}
+
+fn hash(x: u32) Hash {
+ return @intCast((x *% 0x9E3779B1) >> (32 - lookup_hash_bits));
+}
+
+/// Trades between speed and compression size.
+///
+/// Default paramaters are [taken from zlib]
+/// (https://github.com/madler/zlib/blob/v1.3.1/deflate.c#L112)
pub const Options = struct {
- level: Level = .default,
- container: Container = .raw,
+ /// Perform less lookups when a match of at least this length has been found.
+ good: u16,
+ /// Stop when a match of at least this length has been found.
+ nice: u16,
+ /// Don't attempt a lazy match find when a match of at least this length has been found.
+ lazy: u16,
+ /// Check this many previous locations with the same hash for longer matches.
+ chain: u16,
+
+ // zig fmt: off
+ pub const level_1: Options = .{ .good = 4, .nice = 8, .lazy = 0, .chain = 4 };
+ pub const level_2: Options = .{ .good = 4, .nice = 16, .lazy = 0, .chain = 8 };
+ pub const level_3: Options = .{ .good = 4, .nice = 32, .lazy = 0, .chain = 32 };
+ pub const level_4: Options = .{ .good = 4, .nice = 16, .lazy = 4, .chain = 16 };
+ pub const level_5: Options = .{ .good = 8, .nice = 32, .lazy = 16, .chain = 32 };
+ pub const level_6: Options = .{ .good = 8, .nice = 128, .lazy = 16, .chain = 128 };
+ pub const level_7: Options = .{ .good = 8, .nice = 128, .lazy = 32, .chain = 256 };
+ pub const level_8: Options = .{ .good = 32, .nice = 258, .lazy = 128, .chain = 1024 };
+ pub const level_9: Options = .{ .good = 32, .nice = 258, .lazy = 258, .chain = 4096 };
+ // zig fmt: on
+ pub const fastest = level_1;
+ pub const default = level_6;
+ pub const best = level_9;
};
-pub fn init(output: *Writer, buffer: []u8, options: Options) Compress {
+/// It is asserted `buffer` is least `flate.max_history_len` bytes.
+/// It is asserted `output` has a capacity of at least 8 bytes.
+pub fn init(
+ output: *Writer,
+ buffer: []u8,
+ container: flate.Container,
+ opts: Options,
+) Writer.Error!Compress {
+ assert(output.buffer.len > 8);
+ assert(buffer.len >= flate.max_window_len);
+
+ // note that disallowing some of these simplifies matching logic
+ assert(opts.chain != 0); // use `Huffman`, disallowing this simplies matching
+ assert(opts.good >= 3 and opts.nice >= 3); // a match will (usually) not be found
+ assert(opts.good <= 258 and opts.nice <= 258); // a longer match will not be found
+ assert(opts.lazy <= opts.nice); // a longer match will (usually) not be found
+ if (opts.good <= opts.lazy) assert(opts.chain >= 1 << 2); // chain can be reduced to zero
+
+ try output.writeAll(container.header());
return .{
- .block_writer = .init(output),
- .level = .get(options.level),
- .hasher = .init(options.container),
- .state = .header,
.writer = .{
.buffer = buffer,
- .vtable = &.{ .drain = drain },
+ .vtable = &.{
+ .drain = drain,
+ .flush = flush,
+ .rebase = rebase,
+ },
+ },
+ .has_history = false,
+ .bit_writer = .init(output),
+ .buffered_tokens = .empty,
+ .lookup = .{
+ // init `value` is max so there is 0xff pattern
+ .head = @splat(.{ .value = math.maxInt(u15), .is_null = true }),
+ .chain = undefined,
+ .chain_pos = math.maxInt(u15),
},
+ .container = container,
+ .opts = opts,
+ .hasher = .init(container),
};
}
-// Tokens store
-const Tokens = struct {
- list: [n_tokens]Token = undefined,
- pos: usize = 0,
+fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
+ errdefer w.* = .failing;
+ // There may have not been enough space in the buffer and the write was sent directly here.
+ // However, it is required that all data goes through the buffer to keep a history.
+ //
+ // Additionally, ensuring the buffer is always full ensures there is always a full history
+ // after.
+ const data_n = w.buffer.len - w.end;
+ _ = w.fixedDrain(data, splat) catch {};
+ assert(w.end == w.buffer.len);
+ try rebaseInner(w, 0, 1, false);
+ return data_n;
+}
+
+fn flush(w: *Writer) Writer.Error!void {
+ defer w.* = .failing;
+ const c: *Compress = @fieldParentPtr("writer", w);
+ try rebaseInner(w, 0, w.buffer.len - flate.history_len, true);
+ try c.bit_writer.output.rebase(0, 1);
+ c.bit_writer.byteAlign();
+ try c.hasher.writeFooter(c.bit_writer.output);
+}
+
+fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
+ return rebaseInner(w, preserve, capacity, false);
+}
+
+pub const rebase_min_preserve = flate.history_len;
+pub const rebase_reserved_capacity = (token.max_length + 1) + seq_bytes;
+
+fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
+ if (!eos) {
+ assert(@max(preserve, rebase_min_preserve) + (capacity + rebase_reserved_capacity) <= w.buffer.len);
+ assert(w.end >= flate.history_len + rebase_reserved_capacity); // Above assert should
+ // fail since rebase is only called when `capacity` is not present. This assertion is
+ // important because a full history is required at the end.
+ } else {
+ assert(preserve == 0 and capacity == w.buffer.len - flate.history_len);
+ }
+
+ const c: *Compress = @fieldParentPtr("writer", w);
+ const buffered = w.buffered();
+
+ const start = @as(usize, flate.history_len) * @intFromBool(c.has_history);
+ const lit_end: usize = if (!eos)
+ buffered.len - rebase_reserved_capacity - (preserve -| flate.history_len)
+ else
+ buffered.len -| (seq_bytes - 1);
+
+ var i = start;
+ var last_unmatched = i;
+ // Read from `w.buffer` instead of `buffered` since the latter may not
+ // have enough bytes. If this is the case, this variable is not used.
+ var seq: Seq = mem.readInt(
+ std.meta.Int(.unsigned, (seq_bytes - 1) * 8),
+ w.buffer[i..][0 .. seq_bytes - 1],
+ .big,
+ );
+ if (buffered[i..].len < seq_bytes - 1) {
+ @branchHint(.unlikely);
+ assert(eos);
+ seq = undefined;
+ assert(i >= lit_end);
+ }
+
+ while (i < lit_end) {
+ var match_start = i;
+ seq <<= 8;
+ seq |= buffered[i + (seq_bytes - 1)];
+ var match = c.matchAndAddHash(i, hash(seq), token.min_length - 1, c.opts.chain, c.opts.good);
+ i += 1;
+ if (match.len < token.min_length) continue;
+
+ var match_unadded = match.len - 1;
+ lazy: {
+ if (match.len >= c.opts.lazy) break :lazy;
+ if (match.len >= c.writer.buffered()[i..].len) {
+ @branchHint(.unlikely); // Only end of stream
+ break :lazy;
+ }
- fn add(self: *Tokens, t: Token) void {
- self.list[self.pos] = t;
- self.pos += 1;
+ var chain = c.opts.chain;
+ var good = c.opts.good;
+ if (match.len >= good) {
+ chain >>= 2;
+ good = math.maxInt(u8); // Reduce only once
+ }
+
+ seq <<= 8;
+ seq |= buffered[i + (seq_bytes - 1)];
+ const lazy = c.matchAndAddHash(i, hash(seq), match.len, chain, good);
+ match_unadded -= 1;
+ i += 1;
+
+ if (lazy.len > match.len) {
+ match_start += 1;
+ match = lazy;
+ match_unadded = match.len - 1;
+ }
+ }
+
+ assert(i + match_unadded == match_start + match.len);
+ assert(mem.eql(
+ u8,
+ buffered[match_start..][0..match.len],
+ buffered[match_start - 1 - match.dist ..][0..match.len],
+ )); // This assert also seems to help codegen.
+
+ try c.outputBytes(buffered[last_unmatched..match_start]);
+ try c.outputMatch(@intCast(match.dist), @intCast(match.len - 3));
+
+ last_unmatched = match_start + match.len;
+ if (last_unmatched + seq_bytes >= w.end) {
+ @branchHint(.unlikely);
+ assert(eos);
+ i = undefined;
+ break;
+ }
+
+ while (true) {
+ seq <<= 8;
+ seq |= buffered[i + (seq_bytes - 1)];
+ _ = c.addHash(i, hash(seq));
+ i += 1;
+
+ match_unadded -= 1;
+ if (match_unadded == 0) break;
+ }
+ assert(i == match_start + match.len);
}
- fn full(self: *Tokens) bool {
- return self.pos == self.list.len;
+ if (eos) {
+ i = undefined; // (from match hashing logic)
+ try c.outputBytes(buffered[last_unmatched..]);
+ c.hasher.update(buffered[start..]);
+ try c.writeBlock(true);
+ return;
}
- fn reset(self: *Tokens) void {
- self.pos = 0;
+ try c.outputBytes(buffered[last_unmatched..i]);
+ c.hasher.update(buffered[start..i]);
+
+ const preserved = buffered[i - flate.history_len ..];
+ assert(preserved.len > @max(rebase_min_preserve, preserve));
+ @memmove(w.buffer[0..preserved.len], preserved);
+ w.end = preserved.len;
+ c.has_history = true;
+}
+
+fn addHash(c: *Compress, i: usize, h: Hash) void {
+ assert(h == hash(mem.readInt(Seq, c.writer.buffer[i..][0..seq_bytes], .big)));
+
+ const l = &c.lookup;
+ l.chain_pos +%= 1;
+
+ // Equivilent to the below, however LLVM 21 does not optimize `@subWithOverflow` well at all.
+ // const replaced_i, const no_replace = @subWithOverflow(i, flate.history_len);
+ // if (no_replace == 0) {
+ if (i >= flate.history_len) {
+ @branchHint(.likely);
+ const replaced_i = i - flate.history_len;
+ // The following is the same as the below except uses a 32-bit load to help optimizations
+ // const replaced_seq = mem.readInt(Seq, c.writer.buffer[replaced_i..][0..seq_bytes], .big);
+ comptime assert(@sizeOf(Seq) <= @sizeOf(u32));
+ const replaced_u32 = mem.readInt(u32, c.writer.buffered()[replaced_i..][0..4], .big);
+ const replaced_seq: Seq = @intCast(replaced_u32 >> (32 - @bitSizeOf(Seq)));
+
+ const replaced_h = hash(replaced_seq);
+ // The following is equivilent to the below since LLVM 21 doesn't optimize it well.
+ // l.head[replaced_h].is_null = l.head[replaced_h].is_null or
+ // l.head[replaced_h].int() == l.chain_pos;
+ const empty_head = l.head[replaced_h].int() == l.chain_pos;
+ const null_flag = PackedOptionalU15.int(.{ .is_null = empty_head, .value = 0 });
+ l.head[replaced_h] = @bitCast(l.head[replaced_h].int() | null_flag);
}
- fn tokens(self: *Tokens) []const Token {
- return self.list[0..self.pos];
+ const prev_chain_index = l.head[h];
+ l.chain[l.chain_pos] = @bitCast((l.chain_pos -% prev_chain_index.value) |
+ (prev_chain_index.int() & PackedOptionalU15.null_bit.int())); // Preserves null
+ l.head[h] = .{ .value = l.chain_pos, .is_null = false };
+}
+
+/// If the match is shorter, the returned value can be any value `<= old`.
+fn betterMatchLen(old: u16, prev: []const u8, bytes: []const u8) u16 {
+ assert(old < @min(bytes.len, token.max_length));
+ assert(prev.len >= bytes.len);
+ assert(bytes.len >= token.min_length);
+
+ var i: u16 = 0;
+ const Block = std.meta.Int(.unsigned, @min(math.divCeil(
+ comptime_int,
+ math.ceilPowerOfTwoAssert(usize, @bitSizeOf(usize)),
+ 8,
+ ) catch unreachable, 256) * 8);
+
+ if (bytes.len < token.max_length) {
+ @branchHint(.unlikely); // Only end of stream
+
+ while (bytes[i..].len >= @sizeOf(Block)) {
+ const a = mem.readInt(Block, prev[i..][0..@sizeOf(Block)], .little);
+ const b = mem.readInt(Block, bytes[i..][0..@sizeOf(Block)], .little);
+ const diff = a ^ b;
+ if (diff != 0) {
+ @branchHint(.likely);
+ i += @ctz(diff) / 8;
+ return i;
+ }
+ i += @sizeOf(Block);
+ }
+
+ while (i != bytes.len and prev[i] == bytes[i]) {
+ i += 1;
+ }
+ assert(i < token.max_length);
+ return i;
}
-};
-fn drain(me: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
- _ = data;
- _ = splat;
- const c: *Compress = @fieldParentPtr("writer", me);
- const out = c.block_writer.output;
- switch (c.state) {
- .header => {
- c.state = .middle;
- const header = c.hasher.container().header();
- try out.writeAll(header);
- return header.len;
- },
- .middle => {},
- .ended => unreachable,
+ if (old >= @sizeOf(Block)) {
+ // Check that a longer end is present, otherwise the match is always worse
+ const a = mem.readInt(Block, prev[old + 1 - @sizeOf(Block) ..][0..@sizeOf(Block)], .little);
+ const b = mem.readInt(Block, bytes[old + 1 - @sizeOf(Block) ..][0..@sizeOf(Block)], .little);
+ if (a != b) return i;
+ }
+
+ while (true) {
+ const a = mem.readInt(Block, prev[i..][0..@sizeOf(Block)], .little);
+ const b = mem.readInt(Block, bytes[i..][0..@sizeOf(Block)], .little);
+ const diff = a ^ b;
+ if (diff != 0) {
+ i += @ctz(diff) / 8;
+ return i;
+ }
+ i += @sizeOf(Block);
+ if (i == 256) break;
+ }
+
+ const a = mem.readInt(u16, prev[i..][0..2], .little);
+ const b = mem.readInt(u16, bytes[i..][0..2], .little);
+ const diff = a ^ b;
+ i += @ctz(diff) / 8;
+ assert(i <= token.max_length);
+ return i;
+}
+
+test betterMatchLen {
+ try std.testing.fuzz({}, testFuzzedMatchLen, .{});
+}
+
+fn testFuzzedMatchLen(_: void, input: []const u8) !void {
+ @disableInstrumentation();
+ var r: Io.Reader = .fixed(input);
+ var buf: [1024]u8 = undefined;
+ var w: Writer = .fixed(&buf);
+ var old = r.takeLeb128(u9) catch 0;
+ var bytes_off = @max(1, r.takeLeb128(u10) catch 258);
+ const prev_back = @max(1, r.takeLeb128(u10) catch 258);
+
+ while (r.takeByte()) |byte| {
+ const op: packed struct(u8) {
+ kind: enum(u2) { splat, copy, insert_imm, insert },
+ imm: u6,
+
+ pub fn immOrByte(op_s: @This(), r_s: *Io.Reader) usize {
+ return if (op_s.imm == 0) op_s.imm else @as(usize, r_s.takeByte() catch 0) + 64;
+ }
+ } = @bitCast(byte);
+ (switch (op.kind) {
+ .splat => w.splatByteAll(r.takeByte() catch 0, op.immOrByte(&r)),
+ .copy => write: {
+ const start = w.buffered().len -| op.immOrByte(&r);
+ const len = @min(w.buffered().len - start, r.takeByte() catch 3);
+ break :write w.writeAll(w.buffered()[start..][0..len]);
+ },
+ .insert_imm => w.writeByte(op.imm),
+ .insert => w.writeAll(r.take(
+ @min(r.bufferedLen(), @as(usize, op.imm) + 1),
+ ) catch unreachable),
+ }) catch break;
+ } else |_| {}
+
+ w.splatByteAll(0, (1 + 3) -| w.buffered().len) catch unreachable;
+ bytes_off = @min(bytes_off, @as(u10, @intCast(w.buffered().len - 3)));
+ const prev_off = bytes_off -| prev_back;
+ assert(prev_off < bytes_off);
+ const prev = w.buffered()[prev_off..];
+ const bytes = w.buffered()[bytes_off..];
+ old = @min(old, bytes.len - 1, token.max_length - 1);
+
+ const diff_index = mem.indexOfDiff(u8, prev, bytes).?; // unwrap since lengths are not same
+ const expected_len = @min(diff_index, 258);
+ errdefer std.debug.print(
+ \\prev : '{any}'
+ \\bytes: '{any}'
+ \\old : {}
+ \\expected: {?}
+ \\actual : {}
+ ++ "\n", .{
+ prev, bytes, old,
+ if (old < expected_len) expected_len else null, betterMatchLen(old, prev, bytes),
+ });
+ if (old < expected_len) {
+ try std.testing.expectEqual(expected_len, betterMatchLen(old, prev, bytes));
+ } else {
+ try std.testing.expect(betterMatchLen(old, prev, bytes) <= old);
+ }
+}
+
+fn matchAndAddHash(c: *Compress, i: usize, h: Hash, gt: u16, max_chain: u16, good_: u16) struct {
+ dist: u16,
+ len: u16,
+} {
+ const l = &c.lookup;
+ const buffered = c.writer.buffered();
+
+ var chain_limit = max_chain;
+ var best_dist: u16 = undefined;
+ var best_len = gt;
+ const nice = @min(c.opts.nice, buffered[i..].len);
+ var good = good_;
+
+ search: {
+ if (l.head[h].is_null) break :search;
+ // Actually a u15, but LLVM 21 does not optimize that as well (it truncates it each use).
+ var dist: u16 = l.chain_pos -% l.head[h].value;
+ while (true) {
+ chain_limit -= 1;
+
+ const match_len = betterMatchLen(best_len, buffered[i - 1 - dist ..], buffered[i..]);
+ if (match_len > best_len) {
+ best_dist = dist;
+ best_len = match_len;
+ if (best_len >= nice) break;
+ if (best_len >= good) {
+ chain_limit >>= 2;
+ good = math.maxInt(u8); // Reduce only once
+ }
+ }
+
+ if (chain_limit == 0) break;
+ const next_chain_index = l.chain_pos -% @as(u15, @intCast(dist));
+ // Equivilent to the below, however LLVM 21 optimizes the below worse.
+ // if (l.chain[next_chain_index].is_null) break;
+ // dist, const out_of_window = @addWithOverflow(dist, l.chain[next_chain_index].value);
+ // if (out_of_window == 1) break;
+ dist +%= l.chain[next_chain_index].int(); // wrapping for potential null bit
+ comptime assert(flate.history_len == PackedOptionalU15.int(.null_bit));
+ // Also, doing >= flate.history_len gives worse codegen with LLVM 21.
+ if ((dist | l.chain[next_chain_index].int()) & flate.history_len != 0) break;
+ }
+ }
+
+ c.addHash(i, h);
+ return .{ .dist = best_dist, .len = best_len };
+}
+
+fn clenHlen(freqs: [19]u16) u4 {
+ // Note that the first four codes (16, 17, 18, and 0) are always present.
+ if (builtin.mode != .ReleaseSmall and (std.simd.suggestVectorLength(u16) orelse 1) >= 8) {
+ const V = @Vector(16, u16);
+ const hlen_mul: V = comptime m: {
+ var hlen_mul: [16]u16 = undefined;
+ for (token.codegen_order[3..], 0..) |i, hlen| {
+ hlen_mul[i] = hlen;
+ }
+ break :m hlen_mul;
+ };
+ const encoded = freqs[0..16].* != @as(V, @splat(0));
+ return @intCast(@reduce(.Max, @intFromBool(encoded) * hlen_mul));
+ } else {
+ var max: u4 = 0;
+ for (token.codegen_order[4..], 1..) |i, len| {
+ max = if (freqs[i] == 0) max else @intCast(len);
+ }
+ return max;
+ }
+}
+
+test clenHlen {
+ var freqs: [19]u16 = @splat(0);
+ try std.testing.expectEqual(0, clenHlen(freqs));
+ for (token.codegen_order, 1..) |i, len| {
+ freqs[i] = 1;
+ try std.testing.expectEqual(len -| 4, clenHlen(freqs));
+ freqs[i] = 0;
+ }
+}
+
+/// Returns the number of values followed by the bitsize of the extra bits.
+fn buildClen(
+ dyn_bits: []const u4,
+ out_values: []u8,
+ out_extra: []u8,
+ out_freqs: *[19]u16,
+) struct { u16, u16 } {
+ assert(dyn_bits.len <= out_values.len);
+ assert(out_values.len == out_extra.len);
+
+ var len: u16 = 0;
+ var extra_bitsize: u16 = 0;
+
+ var remaining_bits = dyn_bits;
+ var prev: u4 = 0;
+ while (true) {
+ const b = remaining_bits[0];
+ const n_max = @min(@as(u8, if (b != 0)
+ if (b != prev) 1 else 6
+ else
+ 138), remaining_bits.len);
+ prev = b;
+
+ var n: u8 = 0;
+ while (true) {
+ remaining_bits = remaining_bits[1..];
+ n += 1;
+ if (n == n_max or remaining_bits[0] != b) break;
+ }
+ const code, const extra, const xsize = switch (n) {
+ 0 => unreachable,
+ 1...2 => .{ b, 0, 0 },
+ 3...10 => .{
+ @as(u8, 16) + @intFromBool(b == 0),
+ n - 3,
+ @as(u8, 2) + @intFromBool(b == 0),
+ },
+ 11...138 => .{ 18, n - 11, 7 },
+ else => unreachable,
+ };
+ while (true) {
+ out_values[len] = code;
+ out_extra[len] = extra;
+ out_freqs[code] += 1;
+ extra_bitsize += xsize;
+ len += 1;
+ if (n != 2) {
+ @branchHint(.likely);
+ break;
+ }
+ // Code needs outputted once more
+ n = 1;
+ }
+ if (remaining_bits.len == 0) break;
+ }
+
+ return .{ len, extra_bitsize };
+}
+
+test buildClen {
+ //dyn_bits: []u4,
+ //out_values: *[288 + 30]u8,
+ //out_extra: *[288 + 30]u8,
+ //out_freqs: *[19]u16,
+ //struct { u16, u16 }
+ var out_values: [288 + 30]u8 = undefined;
+ var out_extra: [288 + 30]u8 = undefined;
+ var out_freqs: [19]u16 = @splat(0);
+ const len, const extra_bitsize = buildClen(&([_]u4{
+ 1, // A
+ 2, 2, // B
+ 3, 3, 3, // C
+ 4, 4, 4, 4, // D
+ 5, // E
+ 5, 5, 5, 5, 5, 5, //
+ 5, 5, 5, 5, 5, 5,
+ 5, 5,
+ 0, 1, // F
+ 0, 0, 1, // G
+ } ++ @as([138 + 10]u4, @splat(0)) // H
+ ), &out_values, &out_extra, &out_freqs);
+ try std.testing.expectEqualSlices(u8, &.{
+ 1, // A
+ 2, 2, // B
+ 3, 3, 3, // C
+ 4, 16, // D
+ 5, 16, 16, 5, 5, // E
+ 0, 1, // F
+ 0, 0, 1, // G
+ 18, 17, // H
+ }, out_values[0..len]);
+ try std.testing.expectEqualSlices(u8, &.{
+ 0, // A
+ 0, 0, // B
+ 0, 0, 0, // C
+ 0, (0), // D
+ 0, (3), (3), 0, 0, // E
+ 0, 0, // F
+ 0, 0, 0, // G
+ (127), (7), // H
+ }, out_extra[0..len]);
+ try std.testing.expectEqual(2 + 2 + 2 + 7 + 3, extra_bitsize);
+ try std.testing.expectEqualSlices(u16, &.{
+ 3, 3, 2, 3, 1, 3, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 3, 1, 1,
+ }, &out_freqs);
+}
+
+fn writeBlock(c: *Compress, eos: bool) Writer.Error!void {
+ const toks = &c.buffered_tokens;
+ if (!eos) assert(toks.n == block_tokens);
+ assert(toks.lit_freqs[256] == 0);
+ toks.lit_freqs[256] = 1;
+
+ var dyn_codes_buf: [286 + 30]u16 = undefined;
+ var dyn_bits_buf: [286 + 30]u4 = @splat(0);
+
+ const dyn_lit_codes_bitsize, const dyn_last_lit = huffman.build(
+ &toks.lit_freqs,
+ dyn_codes_buf[0..286],
+ dyn_bits_buf[0..286],
+ 15,
+ true,
+ );
+ const dyn_lit_len = @max(257, dyn_last_lit + 1);
+
+ const dyn_dist_codes_bitsize, const dyn_last_dist = huffman.build(
+ &toks.dist_freqs,
+ dyn_codes_buf[dyn_lit_len..][0..30],
+ dyn_bits_buf[dyn_lit_len..][0..30],
+ 15,
+ true,
+ );
+ const dyn_dist_len = @max(1, dyn_last_dist + 1);
+
+ var clen_values: [288 + 30]u8 = undefined;
+ var clen_extra: [288 + 30]u8 = undefined;
+ var clen_freqs: [19]u16 = @splat(0);
+ const clen_len, const clen_extra_bitsize = buildClen(
+ dyn_bits_buf[0 .. dyn_lit_len + dyn_dist_len],
+ &clen_values,
+ &clen_extra,
+ &clen_freqs,
+ );
+
+ var clen_codes: [19]u16 = undefined;
+ var clen_bits: [19]u4 = @splat(0);
+ const clen_codes_bitsize, _ = huffman.build(
+ &clen_freqs,
+ &clen_codes,
+ &clen_bits,
+ 7,
+ false,
+ );
+ const hclen = clenHlen(clen_freqs);
+
+ const dynamic_bitsize = @as(u32, 14) +
+ (4 + @as(u6, hclen)) * 3 + clen_codes_bitsize + clen_extra_bitsize +
+ dyn_lit_codes_bitsize + dyn_dist_codes_bitsize;
+ const fixed_bitsize = n: {
+ const freq7 = 1; // eos
+ var freq8: u16 = 0;
+ var freq9: u16 = 0;
+ var freq12: u16 = 0; // 7 + 5 - match freqs always have corresponding 5-bit dist freq
+ var freq13: u16 = 0; // 8 + 5
+ for (toks.lit_freqs[0..144]) |f| freq8 += f;
+ for (toks.lit_freqs[144..256]) |f| freq9 += f;
+ assert(toks.lit_freqs[256] == 1);
+ for (toks.lit_freqs[257..280]) |f| freq12 += f;
+ for (toks.lit_freqs[280..286]) |f| freq13 += f;
+ break :n @as(u32, freq7) * 7 +
+ @as(u32, freq8) * 8 + @as(u32, freq9) * 9 +
+ @as(u32, freq12) * 12 + @as(u32, freq13) * 13;
+ };
+
+ stored: {
+ for (toks.dist_freqs) |n| if (n != 0) break :stored;
+ // No need to check len frequencies since they each have a corresponding dist frequency
+ assert(for (toks.lit_freqs[257..]) |f| (if (f != 0) break false) else true);
+
+ // No matches. If the stored size is smaller than the huffman-encoded version, it will be
+ // outputed in a store block. This is not done with matches since the original input would
+ // need to be stored since the window may slid, and it may also exceed 65535 bytes. This
+ // should be OK since most inputs with matches should be more compressable anyways.
+ const stored_align_bits = -%(c.bit_writer.buffered_n +% 3);
+ const stored_bitsize = stored_align_bits + @as(u32, 32) + @as(u32, toks.n) * 8;
+ if (@min(dynamic_bitsize, fixed_bitsize) < stored_bitsize) break :stored;
+
+ try c.bit_writer.write(BlockHeader.int(.{ .kind = .stored, .final = eos }), 3);
+ try c.bit_writer.output.rebase(0, 5);
+ c.bit_writer.byteAlign();
+ c.bit_writer.output.writeInt(u16, c.buffered_tokens.n, .little) catch unreachable;
+ c.bit_writer.output.writeInt(u16, ~c.buffered_tokens.n, .little) catch unreachable;
+
+ // Relatively small buffer since regular draining will
+ // always consume slightly less than 2 << 15 bytes.
+ var vec_buf: [4][]const u8 = undefined;
+ var vec_n: usize = 0;
+ var i: usize = 0;
+
+ assert(c.buffered_tokens.pos != 0);
+ while (i != c.buffered_tokens.pos) {
+ const h: TokenBufferEntryHeader = @bitCast(toks.list[i..][0..2].*);
+ assert(h.kind == .bytes);
+
+ i += 2;
+ vec_buf[vec_n] = toks.list[i..][0..h.data];
+ i += h.data;
+
+ vec_n += 1;
+ if (i == c.buffered_tokens.pos or vec_n == vec_buf.len) {
+ try c.bit_writer.output.writeVecAll(vec_buf[0..vec_n]);
+ vec_n = 0;
+ }
+ }
+
+ toks.* = .empty;
+ return;
+ }
+
+ const lit_codes, const lit_bits, const dist_codes, const dist_bits =
+ if (dynamic_bitsize < fixed_bitsize) codes: {
+ try c.bit_writer.write(BlockHeader.Dynamic.int(.{
+ .regular = .{ .final = eos, .kind = .dynamic },
+ .hlit = @intCast(dyn_lit_len - 257),
+ .hdist = @intCast(dyn_dist_len - 1),
+ .hclen = hclen,
+ }), 17);
+ try c.bit_writer.writeClen(
+ hclen,
+ clen_values[0..clen_len],
+ clen_extra[0..clen_len],
+ clen_codes,
+ clen_bits,
+ );
+ break :codes .{
+ dyn_codes_buf[0..dyn_lit_len],
+ dyn_bits_buf[0..dyn_lit_len],
+ dyn_codes_buf[dyn_lit_len..][0..dyn_dist_len],
+ dyn_bits_buf[dyn_lit_len..][0..dyn_dist_len],
+ };
+ } else codes: {
+ try c.bit_writer.write(BlockHeader.int(.{ .final = eos, .kind = .fixed }), 3);
+ break :codes .{
+ &token.fixed_lit_codes,
+ &token.fixed_lit_bits,
+ &token.fixed_dist_codes,
+ &token.fixed_dist_bits,
+ };
+ };
+
+ var i: usize = 0;
+ while (i != toks.pos) {
+ const h: TokenBufferEntryHeader = @bitCast(toks.list[i..][0..2].*);
+ i += 2;
+ if (h.kind == .bytes) {
+ for (toks.list[i..][0..h.data]) |b| {
+ try c.bit_writer.write(lit_codes[b], lit_bits[b]);
+ }
+ i += h.data;
+ } else {
+ const dist = h.data;
+ const len = toks.list[i];
+ i += 1;
+ const dist_code = token.DistCode.fromVal(dist);
+ const len_code = token.LenCode.fromVal(len);
+ const dist_val = dist_code.toInt();
+ const lit_val = @as(u16, 257) + len_code.toInt();
+
+ var out: u48 = lit_codes[lit_val];
+ var out_bits: u6 = lit_bits[lit_val];
+ out |= @shlExact(@as(u20, len - len_code.base()), @intCast(out_bits));
+ out_bits += len_code.extraBits();
+
+ out |= @shlExact(@as(u35, dist_codes[dist_val]), out_bits);
+ out_bits += dist_bits[dist_val];
+ out |= @shlExact(@as(u48, dist - dist_code.base()), out_bits);
+ out_bits += dist_code.extraBits();
+
+ try c.bit_writer.write(out, out_bits);
+ }
+ }
+ try c.bit_writer.write(lit_codes[256], lit_bits[256]);
+
+ toks.* = .empty;
+}
+
+/// Huffman tree construction.
+///
+/// The approach for building the huffman tree is [taken from zlib]
+/// (https://github.com/madler/zlib/blob/v1.3.1/trees.c#L625) with some modifications.
+const huffman = struct {
+ const max_leafs = 286;
+ const max_nodes = max_leafs * 2;
+
+ const Node = struct {
+ freq: u16,
+ depth: u16,
+
+ pub const Index = u16;
+
+ pub fn smaller(a: Node, b: Node) bool {
+ return if (a.freq != b.freq) a.freq < b.freq else a.depth < b.depth;
+ }
+ };
+
+ fn heapSiftDown(nodes: []Node, heap: []Node.Index, start: usize) void {
+ var i = start;
+ while (true) {
+ var min = i;
+ const l = i * 2 + 1;
+ const r = l + 1;
+ min = if (l < heap.len and nodes[heap[l]].smaller(nodes[heap[min]])) l else min;
+ min = if (r < heap.len and nodes[heap[r]].smaller(nodes[heap[min]])) r else min;
+ if (i == min) break;
+ mem.swap(Node.Index, &heap[i], &heap[min]);
+ i = min;
+ }
+ }
+
+ fn heapRemoveRoot(nodes: []Node, heap: []Node.Index) void {
+ heap[0] = heap[heap.len - 1];
+ heapSiftDown(nodes, heap[0 .. heap.len - 1], 0);
+ }
+
+ /// Returns the total bits to encode `freqs` followed by the index of the last non-zero bits.
+ /// For `freqs[i]` == 0, `out_codes[i]` will be undefined.
+ /// It is asserted `out_bits` is zero-filled.
+ /// It is asserted `out_bits.len` is at least a length of
+ /// one if ncomplete trees are allowed and two otherwise.
+ pub fn build(
+ freqs: []const u16,
+ out_codes: []u16,
+ out_bits: []u4,
+ max_bits: u4,
+ incomplete_allowed: bool,
+ ) struct { u32, u16 } {
+ assert(out_codes.len - 1 >= @intFromBool(incomplete_allowed));
+ // freqs and out_codes are in the loop to assert they are all the same length
+ for (freqs, out_codes, out_bits) |_, _, n| assert(n == 0);
+ assert(out_codes.len <= @as(u16, 1) << max_bits);
+
+ // Indexes 0..freqs are leafs, indexes max_leafs.. are internal nodes.
+ var tree_nodes: [max_nodes]Node = undefined;
+ var tree_parent_nodes: [max_nodes]Node.Index = undefined;
+ var nodes_end: u16 = max_leafs;
+ // Dual-purpose buffer. Nodes are ordered by least frequency or when equal, least depth.
+ // The start is a min heap of level-zero nodes.
+ // The end is a sorted buffer of nodes with the greatest first.
+ var node_buf: [max_nodes]Node.Index = undefined;
+ var heap_end: u16 = 0;
+ var sorted_start: u16 = node_buf.len;
+
+ for (0.., freqs) |n, freq| {
+ tree_nodes[n] = .{ .freq = freq, .depth = 0 };
+ node_buf[heap_end] = @intCast(n);
+ heap_end += @intFromBool(freq != 0);
+ }
+
+ // There must be at least one code at minimum,
+ node_buf[heap_end] = 0;
+ heap_end += @intFromBool(heap_end == 0);
+ // and at least two if incomplete must be avoided.
+ if (heap_end == 1 and incomplete_allowed) {
+ @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without
+
+ // Codes must have at least one-bit, so this is a special case.
+ out_bits[node_buf[0]] = 1;
+ out_codes[node_buf[0]] = 0;
+ return .{ freqs[node_buf[0]], node_buf[0] };
+ }
+ const last_nonzero = @max(node_buf[heap_end - 1], 1); // For heap_end > 1, last is not be 0
+ node_buf[heap_end] = @intFromBool(node_buf[0] == 0);
+ heap_end += @intFromBool(heap_end == 1);
+
+ // Heapify the array of frequencies
+ const heapify_final = heap_end - 1;
+ const heapify_start = (heapify_final - 1) / 2; // Parent of final node
+ var heapify_i = heapify_start;
+ while (true) {
+ heapSiftDown(&tree_nodes, node_buf[0..heap_end], heapify_i);
+ if (heapify_i == 0) break;
+ heapify_i -= 1;
+ }
+
+ // Build optimal tree. `max_bits` is not enforced yet.
+ while (heap_end > 1) {
+ const a = node_buf[0];
+ heapRemoveRoot(&tree_nodes, node_buf[0..heap_end]);
+ heap_end -= 1;
+ const b = node_buf[0];
+
+ sorted_start -= 2;
+ node_buf[sorted_start..][0..2].* = .{ b, a };
+
+ tree_nodes[nodes_end] = .{
+ .freq = tree_nodes[a].freq + tree_nodes[b].freq,
+ .depth = @max(tree_nodes[a].depth, tree_nodes[b].depth) + 1,
+ };
+ defer nodes_end += 1;
+ tree_parent_nodes[a] = nodes_end;
+ tree_parent_nodes[b] = nodes_end;
+
+ node_buf[0] = nodes_end;
+ heapSiftDown(&tree_nodes, node_buf[0..heap_end], 0);
+ }
+ sorted_start -= 1;
+ node_buf[sorted_start] = node_buf[0];
+
+ var bit_counts: [16]u16 = @splat(0);
+ buildBits(out_bits, &bit_counts, &tree_parent_nodes, node_buf[sorted_start..], max_bits);
+ return .{ buildValues(freqs, out_codes, out_bits, bit_counts), last_nonzero };
+ }
+
+ fn buildBits(
+ out_bits: []u4,
+ bit_counts: *[16]u16,
+ parent_nodes: *[max_nodes]Node.Index,
+ sorted: []Node.Index,
+ max_bits: u4,
+ ) void {
+ var internal_node_bits: [max_nodes - max_leafs]u4 = undefined;
+ var overflowed: u16 = 0;
+
+ internal_node_bits[sorted[0] - max_leafs] = 0; // root
+ for (sorted[1..]) |i| {
+ const parent_bits = internal_node_bits[parent_nodes[i] - max_leafs];
+ overflowed += @intFromBool(parent_bits == max_bits);
+ const bits = parent_bits + @intFromBool(parent_bits != max_bits);
+ bit_counts[bits] += @intFromBool(i < max_leafs);
+ (if (i >= max_leafs) &internal_node_bits[i - max_leafs] else &out_bits[i]).* = bits;
+ }
+
+ if (overflowed == 0) {
+ @branchHint(.likely);
+ return;
+ }
+
+ outer: while (true) {
+ var deepest: u4 = max_bits - 1;
+ while (bit_counts[deepest] == 0) deepest -= 1;
+ while (overflowed != 0) {
+ // Insert an internal node under the leaf and move an overflow as its sibling
+ bit_counts[deepest] -= 1;
+ bit_counts[deepest + 1] += 2;
+ // Only overflow moved. Its sibling's depth is one less, however is still >= depth.
+ bit_counts[max_bits] -= 1;
+ overflowed -= 2;
+
+ if (overflowed == 0) break :outer;
+ deepest += 1;
+ if (deepest == max_bits) continue :outer;
+ }
+ }
+
+ // Reassign bit lengths
+ assert(bit_counts[0] == 0);
+ var i: usize = 0;
+ for (1.., bit_counts[1..]) |bits, all| {
+ var remaining = all;
+ while (remaining != 0) {
+ defer i += 1;
+ if (sorted[i] >= max_leafs) continue;
+ out_bits[sorted[i]] = @intCast(bits);
+ remaining -= 1;
+ }
+ }
+ assert(for (sorted[i..]) |n| { // all leafs consumed
+ if (n < max_leafs) break false;
+ } else true);
+ }
+
+ fn buildValues(freqs: []const u16, out_codes: []u16, bits: []u4, bit_counts: [16]u16) u32 {
+ var code: u16 = 0;
+ var base: [16]u16 = undefined;
+ assert(bit_counts[0] == 0);
+ for (bit_counts[1..], base[1..]) |c, *b| {
+ b.* = code;
+ code +%= c;
+ code <<= 1;
+ }
+ var freq_sums: [16]u16 = @splat(0);
+ for (out_codes, bits, freqs) |*c, b, f| {
+ c.* = @bitReverse(base[b]) >> -%b;
+ base[b] += 1; // For `b == 0` this is fine since v is specified to be undefined.
+ freq_sums[b] += f;
+ }
+ return @reduce(.Add, @as(@Vector(16, u32), freq_sums) * std.simd.iota(u32, 16));
+ }
+
+ test build {
+ var codes: [8]u16 = undefined;
+ var bits: [8]u4 = undefined;
+
+ const regular_freqs: [8]u16 = .{ 1, 1, 0, 8, 8, 0, 2, 4 };
+ // The optimal tree for the above frequencies is
+ // 4 1 1
+ // \ /
+ // 3 2 #
+ // \ /
+ // 2 8 8 4 #
+ // \ / \ /
+ // 1 # #
+ // \ /
+ // 0 #
+ bits = @splat(0);
+ var n, var lnz = build(®ular_freqs, &codes, &bits, 15, true);
+ codes[2] = 0;
+ codes[5] = 0;
+ try std.testing.expectEqualSlices(u4, &.{ 4, 4, 0, 2, 2, 0, 3, 2 }, &bits);
+ try std.testing.expectEqualSlices(u16, &.{
+ 0b0111, 0b1111, 0, 0b00, 0b10, 0, 0b011, 0b01,
+ }, &codes);
+ try std.testing.expectEqual(54, n);
+ try std.testing.expectEqual(7, lnz);
+ // When constrained to 3 bits, it becomes
+ // 3 1 1 2 4
+ // \ / \ /
+ // 2 8 8 # #
+ // \ / \ /
+ // 1 # #
+ // \ /
+ // 0 #
+ bits = @splat(0);
+ n, lnz = build(®ular_freqs, &codes, &bits, 3, true);
+ codes[2] = 0;
+ codes[5] = 0;
+ try std.testing.expectEqualSlices(u4, &.{ 3, 3, 0, 2, 2, 0, 3, 3 }, &bits);
+ try std.testing.expectEqualSlices(u16, &.{
+ 0b001, 0b101, 0, 0b00, 0b10, 0, 0b011, 0b111,
+ }, &codes);
+ try std.testing.expectEqual(56, n);
+ try std.testing.expectEqual(7, lnz);
+
+ // Empty tree. At least one code should be present
+ bits = @splat(0);
+ n, lnz = build(&.{ 0, 0 }, codes[0..2], bits[0..2], 15, true);
+ try std.testing.expectEqualSlices(u4, &.{ 1, 0 }, bits[0..2]);
+ try std.testing.expectEqual(0b0, codes[0]);
+ try std.testing.expectEqual(0, n);
+ try std.testing.expectEqual(0, lnz);
+
+ // Check all incompletable frequencies are completed
+ for ([_][2]u16{ .{ 0, 0 }, .{ 0, 1 }, .{ 1, 0 } }) |incomplete| {
+ // Empty tree. Both codes should be present to prevent incomplete trees
+ bits = @splat(0);
+ n, lnz = build(&incomplete, codes[0..2], bits[0..2], 15, false);
+ try std.testing.expectEqualSlices(u4, &.{ 1, 1 }, bits[0..2]);
+ try std.testing.expectEqualSlices(u16, &.{ 0b0, 0b1 }, codes[0..2]);
+ try std.testing.expectEqual(incomplete[0] + incomplete[1], n);
+ try std.testing.expectEqual(1, lnz);
+ }
+
+ try std.testing.fuzz({}, checkFuzzedBuildFreqs, .{});
}
- const buffered = me.buffered();
- const min_lookahead = Token.min_length + Token.max_length;
- const history_plus_lookahead_len = flate.history_len + min_lookahead;
- if (buffered.len < history_plus_lookahead_len) return 0;
- const lookahead = buffered[flate.history_len..];
+ fn checkFuzzedBuildFreqs(_: void, freqs: []const u8) !void {
+ @disableInstrumentation();
+ var r: Io.Reader = .fixed(freqs);
+ var freqs_limit: u16 = 65535;
+ var freqs_buf: [max_leafs]u16 = undefined;
+ var nfreqs: u15 = 0;
+
+ const params: packed struct(u8) {
+ max_bits: u4,
+ _: u3,
+ incomplete_allowed: bool,
+ } = @bitCast(r.takeByte() catch 255);
+ while (nfreqs != freqs_buf.len) {
+ const leb = r.takeLeb128(u16);
+ const f = if (leb) |f| @min(f, freqs_limit) else |e| switch (e) {
+ error.ReadFailed => unreachable,
+ error.EndOfStream => 0,
+ error.Overflow => freqs_limit,
+ };
+ freqs_buf[nfreqs] = f;
+ nfreqs += 1;
+ freqs_limit -= f;
+ if (leb == error.EndOfStream and nfreqs - 1 > @intFromBool(params.incomplete_allowed))
+ break;
+ }
+
+ var codes_buf: [max_leafs]u16 = undefined;
+ var bits_buf: [max_leafs]u4 = @splat(0);
+ const total_bits, const last_nonzero = build(
+ freqs_buf[0..nfreqs],
+ codes_buf[0..nfreqs],
+ bits_buf[0..nfreqs],
+ @max(math.log2_int_ceil(u15, nfreqs), params.max_bits),
+ params.incomplete_allowed,
+ );
+
+ var has_bitlen_one: bool = false;
+ var expected_total_bits: u32 = 0;
+ var expected_last_nonzero: ?u16 = null;
+ var weighted_sum: u32 = 0;
+ for (freqs_buf[0..nfreqs], bits_buf[0..nfreqs], 0..) |f, nb, i| {
+ has_bitlen_one = has_bitlen_one or nb == 1;
+ weighted_sum += @shlExact(@as(u16, 1), 15 - nb) & ((1 << 15) - 1);
+ expected_total_bits += @as(u32, f) * nb;
+ if (nb != 0) expected_last_nonzero = @intCast(i);
+ }
+
+ errdefer std.log.err(
+ \\ params: {}
+ \\ freqs: {any}
+ \\ bits: {any}
+ \\ # freqs: {}
+ \\ max bits: {}
+ \\ weighted sum: {}
+ \\ has_bitlen_one: {}
+ \\ expected/actual total bits: {}/{}
+ \\ expected/actual last nonzero: {?}/{}
+ ++ "\n", .{
+ params,
+ freqs_buf[0..nfreqs],
+ bits_buf[0..nfreqs],
+ nfreqs,
+ @max(math.log2_int_ceil(u15, nfreqs), params.max_bits),
+ weighted_sum,
+ has_bitlen_one,
+ expected_total_bits,
+ total_bits,
+ expected_last_nonzero,
+ last_nonzero,
+ });
+
+ try std.testing.expectEqual(expected_total_bits, total_bits);
+ try std.testing.expectEqual(expected_last_nonzero, last_nonzero);
+ if (weighted_sum > 1 << 15)
+ return error.OversubscribedHuffmanTree;
+ if (weighted_sum < 1 << 15 and
+ !(params.incomplete_allowed and has_bitlen_one and weighted_sum == 1 << 14))
+ return error.IncompleteHuffmanTree;
+ }
+};
- // TODO tokenize
- _ = lookahead;
- //c.hasher.update(lookahead[0..n]);
- @panic("TODO");
+test {
+ _ = huffman;
}
-pub fn end(c: *Compress) !void {
- try endUnflushed(c);
- const out = c.block_writer.output;
- try out.flush();
+/// [0] is a gradient where the probability of lower values decreases across it
+/// [1] is completely random and hence uncompressable
+fn testingFreqBufs() !*[2][65536]u8 {
+ const fbufs = try std.testing.allocator.create([2][65536]u8);
+ var prng: std.Random.DefaultPrng = .init(std.testing.random_seed);
+ prng.random().bytes(&fbufs[0]);
+ prng.random().bytes(&fbufs[1]);
+ for (0.., &fbufs[0], fbufs[1]) |i, *grad, rand| {
+ const prob = @as(u8, @intCast(255 - i / (fbufs[0].len * 256)));
+ grad.* /= @max(1, rand / @max(1, prob));
+ }
+ return fbufs;
}
-pub fn endUnflushed(c: *Compress) !void {
- while (c.writer.end != 0) _ = try drain(&c.writer, &.{""}, 1);
- c.state = .ended;
+fn testingCheckDecompressedMatches(
+ flate_bytes: []const u8,
+ expected_size: u32,
+ expected_hash: flate.Container.Hasher,
+) !void {
+ const container: flate.Container = expected_hash;
+ var data_hash: flate.Container.Hasher = .init(container);
+ var data_size: u32 = 0;
+ var flate_r: Io.Reader = .fixed(flate_bytes);
+ var deflate_buf: [flate.max_window_len]u8 = undefined;
+ var deflate: flate.Decompress = .init(&flate_r, container, &deflate_buf);
- const out = c.block_writer.output;
+ while (deflate.reader.peekGreedy(1)) |bytes| {
+ data_size += @intCast(bytes.len);
+ data_hash.update(bytes);
+ deflate.reader.toss(bytes.len);
+ } else |e| switch (e) {
+ error.ReadFailed => return deflate.err.?,
+ error.EndOfStream => {},
+ }
- // TODO flush tokens
+ try testingCheckContainerHash(
+ expected_size,
+ expected_hash,
+ data_hash,
+ data_size,
+ deflate.container_metadata,
+ );
+}
- switch (c.hasher) {
- .gzip => |*gzip| {
- // GZIP 8 bytes footer
- // - 4 bytes, CRC32 (CRC-32)
- // - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
- const footer = try out.writableArray(8);
- std.mem.writeInt(u32, footer[0..4], gzip.crc.final(), .little);
- std.mem.writeInt(u32, footer[4..8], @truncate(gzip.count), .little);
+fn testingCheckContainerHash(
+ expected_size: u32,
+ expected_hash: flate.Container.Hasher,
+ actual_hash: flate.Container.Hasher,
+ actual_size: u32,
+ actual_meta: flate.Container.Metadata,
+) !void {
+ try std.testing.expectEqual(expected_size, actual_size);
+ switch (actual_hash) {
+ .raw => {},
+ .gzip => |gz| {
+ const expected_crc = expected_hash.gzip.crc.final();
+ try std.testing.expectEqual(expected_size, actual_meta.gzip.count);
+ try std.testing.expectEqual(expected_crc, gz.crc.final());
+ try std.testing.expectEqual(expected_crc, actual_meta.gzip.crc);
},
- .zlib => |*zlib| {
- // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
- // 4 bytes of ADLER32 (Adler-32 checksum)
- // Checksum value of the uncompressed data (excluding any
- // dictionary data) computed according to Adler-32
- // algorithm.
- std.mem.writeInt(u32, try out.writableArray(4), zlib.adler, .big);
+ .zlib => |zl| {
+ const expected_adler = expected_hash.zlib.adler;
+ try std.testing.expectEqual(expected_adler, zl.adler);
+ try std.testing.expectEqual(expected_adler, actual_meta.zlib.adler);
},
- .raw => {},
}
}
-pub const Simple = struct {
- /// Note that store blocks are limited to 65535 bytes.
- buffer: []u8,
- wp: usize,
- block_writer: BlockWriter,
- hasher: Container.Hasher,
- strategy: Strategy,
+const PackedContainer = packed struct(u2) {
+ raw: bool,
+ other: enum(u1) { gzip, zlib },
+
+ pub fn val(c: @This()) flate.Container {
+ return if (c.raw) .raw else switch (c.other) {
+ .gzip => .gzip,
+ .zlib => .zlib,
+ };
+ }
+};
+
+test Compress {
+ const fbufs = try testingFreqBufs();
+ defer if (!builtin.fuzz) std.testing.allocator.destroy(fbufs);
+ try std.testing.fuzz(fbufs, testFuzzedCompressInput, .{});
+}
+
+fn testFuzzedCompressInput(fbufs: *const [2][65536]u8, input: []const u8) !void {
+ var in: Io.Reader = .fixed(input);
+ var opts: packed struct(u51) {
+ container: PackedContainer,
+ buf_size: u16,
+ good: u8,
+ nice: u8,
+ lazy: u8,
+ /// Not a `u16` to limit it for performance
+ chain: u9,
+ } = @bitCast(in.takeLeb128(u51) catch 0);
+ var expected_hash: flate.Container.Hasher = .init(opts.container.val());
+ var expected_size: u32 = 0;
+
+ var flate_buf: [128 * 1024]u8 = undefined;
+ var flate_w: Writer = .fixed(&flate_buf);
+ var deflate_buf: [flate.max_window_len * 2]u8 = undefined;
+ var deflate_w = try Compress.init(
+ &flate_w,
+ deflate_buf[0 .. flate.max_window_len + @as(usize, opts.buf_size)],
+ opts.container.val(),
+ .{
+ .good = @as(u16, opts.good) + 3,
+ .nice = @as(u16, opts.nice) + 3,
+ .lazy = @as(u16, @min(opts.lazy, opts.nice)) + 3,
+ .chain = @max(1, opts.chain, @as(u8, 4) * @intFromBool(opts.good <= opts.lazy)),
+ },
+ );
+
+ // It is ensured that more bytes are not written then this to ensure this run
+ // does not take too long and that `flate_buf` does not run out of space.
+ const flate_buf_blocks = flate_buf.len / block_tokens;
+ // Allow a max overhead of 64 bytes per block since the implementation does not gaurauntee it
+ // writes store blocks when optimal. This comes from taking less than 32 bytes to write an
+ // optimal dynamic block header of mostly bitlen 8 codes and the end of block literal plus
+ // `(65536 / 256) / 8`, which is is the maximum number of extra bytes from bitlen 9 codes. An
+ // extra 32 bytes is reserved on top of that for container headers and footers.
+ const max_size = flate_buf.len - (flate_buf_blocks * 64 + 32);
+
+ while (true) {
+ const data: packed struct(u36) {
+ is_rebase: bool,
+ is_bytes: bool,
+ params: packed union {
+ copy: packed struct(u34) {
+ len_lo: u5,
+ dist: u15,
+ len_hi: u4,
+ _: u10,
+ },
+ bytes: packed struct(u34) {
+ kind: enum(u1) { gradient, random },
+ off_hi: u4,
+ len_lo: u10,
+ off_mi: u4,
+ len_hi: u5,
+ off_lo: u8,
+ _: u2,
+ },
+ rebase: packed struct(u34) {
+ preserve: u17,
+ capacity: u17,
+ },
+ },
+ } = @bitCast(in.takeLeb128(u36) catch |e| switch (e) {
+ error.ReadFailed => unreachable,
+ error.Overflow => 0,
+ error.EndOfStream => break,
+ });
+
+ const buffered = deflate_w.writer.buffered();
+ // Required for repeating patterns and since writing from `buffered` is illegal
+ var copy_buf: [512]u8 = undefined;
+
+ if (data.is_rebase) {
+ const usable_capacity = deflate_w.writer.buffer.len - rebase_reserved_capacity;
+ const preserve = @min(data.params.rebase.preserve, usable_capacity);
+ const capacity = @min(data.params.rebase.capacity, usable_capacity -
+ @max(rebase_min_preserve, preserve));
+ try deflate_w.writer.rebase(preserve, capacity);
+ continue;
+ }
+
+ const max_bytes = max_size -| expected_size;
+ const bytes = if (!data.is_bytes and buffered.len != 0) bytes: {
+ const dist = @min(buffered.len, @as(u32, data.params.copy.dist) + 1);
+ const len = @min(
+ @max(@shlExact(@as(u9, data.params.copy.len_hi), 5) | data.params.copy.len_lo, 1),
+ max_bytes,
+ );
+ // Reuse the implementation's history. Otherwise our own would need maintained.
+ const bytes_start = buffered[buffered.len - dist ..];
+ const history_bytes = bytes_start[0..@min(bytes_start.len, len)];
+
+ @memcpy(copy_buf[0..history_bytes.len], history_bytes);
+ const new_history = len - history_bytes.len;
+ if (history_bytes.len != len) for ( // check needed for `- dist`
+ copy_buf[history_bytes.len..][0..new_history],
+ copy_buf[history_bytes.len - dist ..][0..new_history],
+ ) |*next, prev| {
+ next.* = prev;
+ };
+ break :bytes copy_buf[0..len];
+ } else bytes: {
+ const off = @shlExact(@as(u16, data.params.bytes.off_hi), 12) |
+ @shlExact(@as(u16, data.params.bytes.off_mi), 8) |
+ data.params.bytes.off_lo;
+ const len = @shlExact(@as(u16, data.params.bytes.len_hi), 10) |
+ data.params.bytes.len_lo;
+ const fbuf = &fbufs[@intFromEnum(data.params.bytes.kind)];
+ break :bytes fbuf[off..][0..@min(len, fbuf.len - off, max_bytes)];
+ };
+ assert(bytes.len <= max_bytes);
+ try deflate_w.writer.writeAll(bytes);
+ expected_hash.update(bytes);
+ expected_size += @intCast(bytes.len);
+ }
+
+ try deflate_w.writer.flush();
+ try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash);
+}
+
+/// Does not compress data
+pub const Raw = struct {
+ /// After `flush` is called, all vtable calls with result in `error.WriteFailed.`
+ writer: Writer,
+ output: *Writer,
+ hasher: flate.Container.Hasher,
- pub const Strategy = enum { huffman, store };
+ const max_block_size: u16 = 65535;
+ const full_header: [5]u8 = .{
+ BlockHeader.int(.{ .final = false, .kind = .stored }),
+ 255,
+ 255,
+ 0,
+ 0,
+ };
- pub fn init(output: *Writer, buffer: []u8, container: Container, strategy: Strategy) !Simple {
- const header = container.header();
- try output.writeAll(header);
+ /// While there is no minimum buffer size, it is recommended
+ /// to be at least `flate.max_window_len` for optimal output.
+ pub fn init(output: *Writer, buffer: []u8, container: flate.Container) Writer.Error!Raw {
+ try output.writeAll(container.header());
return .{
- .buffer = buffer,
- .wp = 0,
- .block_writer = .init(output),
+ .writer = .{
+ .buffer = buffer,
+ .vtable = &.{
+ .drain = Raw.drain,
+ .flush = Raw.flush,
+ .rebase = Raw.rebase,
+ },
+ },
+ .output = output,
.hasher = .init(container),
- .strategy = strategy,
};
}
- pub fn flush(self: *Simple) !void {
- try self.flushBuffer(false);
- try self.block_writer.storedBlock("", false);
- try self.block_writer.flush();
+ fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
+ errdefer w.* = .failing;
+ const r: *Raw = @fieldParentPtr("writer", w);
+ const min_block = @min(w.buffer.len, max_block_size);
+ const pattern = data[data.len - 1];
+ var partial_header: [5]u8 = undefined;
+
+ var vecs: [16][]const u8 = undefined;
+ var vecs_n: usize = 0;
+ const data_bytes = Writer.countSplat(data, splat);
+ const total_bytes = w.end + data_bytes;
+ var rem_bytes = total_bytes;
+ var rem_splat = splat;
+ var rem_data = data;
+ var rem_data_elem: []const u8 = w.buffered();
+
+ assert(rem_bytes > min_block);
+ while (rem_bytes > min_block) { // not >= to allow `min_block` blocks to be marked as final
+ // also, it handles the case of `min_block` being zero (no buffer)
+ const block_size: u16 = @min(rem_bytes, max_block_size);
+ rem_bytes -= block_size;
+
+ if (vecs_n == vecs.len) {
+ try r.output.writeVecAll(&vecs);
+ vecs_n = 0;
+ }
+ vecs[vecs_n] = if (block_size == 65535)
+ &full_header
+ else header: {
+ partial_header[0] = BlockHeader.int(.{ .final = false, .kind = .stored });
+ mem.writeInt(u16, partial_header[1..3], block_size, .little);
+ mem.writeInt(u16, partial_header[3..5], ~block_size, .little);
+ break :header &partial_header;
+ };
+ vecs_n += 1;
+
+ var block_limit: Io.Limit = .limited(block_size);
+ while (true) {
+ if (vecs_n == vecs.len) {
+ try r.output.writeVecAll(&vecs);
+ vecs_n = 0;
+ }
+
+ const vec = block_limit.sliceConst(rem_data_elem);
+ vecs[vecs_n] = vec;
+ vecs_n += 1;
+ r.hasher.update(vec);
+
+ const is_pattern = rem_splat != splat and vec.len == pattern.len;
+ if (is_pattern) assert(pattern.len != 0); // exceeded countSplat
+
+ if (!is_pattern or rem_splat == 0 or pattern.len > @intFromEnum(block_limit) / 2) {
+ rem_data_elem = rem_data_elem[vec.len..];
+ block_limit = block_limit.subtract(vec.len).?;
+
+ if (rem_data_elem.len == 0) {
+ rem_data_elem = rem_data[0];
+ if (rem_data.len != 1) {
+ rem_data = rem_data[1..];
+ } else if (rem_splat != 0) {
+ rem_splat -= 1;
+ } else {
+ // All of `data` has been consumed.
+ assert(block_limit == .nothing);
+ assert(rem_bytes == 0);
+ // Since `rem_bytes` and `block_limit` are zero, these won't be used.
+ rem_data = undefined;
+ rem_data_elem = undefined;
+ rem_splat = undefined;
+ }
+ }
+ if (block_limit == .nothing) break;
+ } else {
+ const out_splat = @intFromEnum(block_limit) / pattern.len;
+ assert(out_splat >= 2);
+
+ try r.output.writeSplatAll(vecs[0..vecs_n], out_splat);
+ for (1..out_splat) |_| r.hasher.update(vec);
+
+ vecs_n = 0;
+ block_limit = block_limit.subtract(pattern.len * out_splat).?;
+ if (rem_splat >= out_splat) {
+ // `out_splat` contains `rem_data`, however one more needs subtracted
+ // anyways since the next pattern is also being taken.
+ rem_splat -= out_splat;
+ } else {
+ // All of `data` has been consumed.
+ assert(block_limit == .nothing);
+ assert(rem_bytes == 0);
+ // Since `rem_bytes` and `block_limit` are zero, these won't be used.
+ rem_data = undefined;
+ rem_data_elem = undefined;
+ rem_splat = undefined;
+ }
+ if (block_limit == .nothing) break;
+ }
+ }
+ }
+
+ if (vecs_n != 0) { // can be the case if a splat was sent
+ try r.output.writeVecAll(vecs[0..vecs_n]);
+ }
+
+ if (rem_bytes > data_bytes) {
+ assert(rem_bytes - data_bytes == rem_data_elem.len);
+ assert(&rem_data_elem[0] == &w.buffer[total_bytes - rem_bytes]);
+ }
+ return w.consume(total_bytes - rem_bytes);
+ }
+
+ fn flush(w: *Writer) Writer.Error!void {
+ defer w.* = .failing;
+ try Raw.rebaseInner(w, 0, w.buffer.len, true);
}
- pub fn finish(self: *Simple) !void {
- try self.flushBuffer(true);
- try self.block_writer.flush();
- try self.hasher.container().writeFooter(&self.hasher, self.block_writer.output);
+ fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
+ errdefer w.* = .failing;
+ try Raw.rebaseInner(w, preserve, capacity, false);
}
- fn flushBuffer(self: *Simple, final: bool) !void {
- const buf = self.buffer[0..self.wp];
- switch (self.strategy) {
- .huffman => try self.block_writer.huffmanBlock(buf, final),
- .store => try self.block_writer.storedBlock(buf, final),
+ fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
+ const r: *Raw = @fieldParentPtr("writer", w);
+ assert(preserve + capacity <= w.buffer.len);
+ if (eos) assert(capacity == w.buffer.len);
+
+ var partial_header: [5]u8 = undefined;
+ var footer_buf: [8]u8 = undefined;
+ const preserved = @min(w.end, preserve);
+ var remaining = w.buffer[0 .. w.end - preserved];
+
+ var vecs: [16][]const u8 = undefined;
+ var vecs_n: usize = 0;
+ while (remaining.len > max_block_size) { // not >= so there is always a block down below
+ if (vecs_n == vecs.len) {
+ try r.output.writeVecAll(&vecs);
+ vecs_n = 0;
+ }
+ vecs[vecs_n + 0] = &full_header;
+ vecs[vecs_n + 1] = remaining[0..max_block_size];
+ r.hasher.update(vecs[vecs_n + 1]);
+ vecs_n += 2;
+ remaining = remaining[max_block_size..];
+ }
+
+ // eos check required for empty block
+ if (w.buffer.len - (remaining.len + preserved) < capacity or eos) {
+ // A partial write is necessary to reclaim enough buffer space
+ const block_size: u16 = @intCast(remaining.len);
+ partial_header[0] = BlockHeader.int(.{ .final = eos, .kind = .stored });
+ mem.writeInt(u16, partial_header[1..3], block_size, .little);
+ mem.writeInt(u16, partial_header[3..5], ~block_size, .little);
+
+ if (vecs_n == vecs.len) {
+ try r.output.writeVecAll(&vecs);
+ vecs_n = 0;
+ }
+ vecs[vecs_n + 0] = &partial_header;
+ vecs[vecs_n + 1] = remaining[0..block_size];
+ r.hasher.update(vecs[vecs_n + 1]);
+ vecs_n += 2;
+ remaining = remaining[block_size..];
+ assert(remaining.len == 0);
+
+ if (eos and r.hasher != .raw) {
+ // the footer is done here instead of `flush` so it can be included in the vector
+ var footer_w: Writer = .fixed(&footer_buf);
+ r.hasher.writeFooter(&footer_w) catch unreachable;
+ assert(footer_w.end != 0);
+
+ if (vecs_n == vecs.len) {
+ try r.output.writeVecAll(&vecs);
+ return r.output.writeAll(footer_w.buffered());
+ } else {
+ vecs[vecs_n] = footer_w.buffered();
+ vecs_n += 1;
+ }
+ }
}
- self.wp = 0;
+
+ try r.output.writeVecAll(vecs[0..vecs_n]);
+ _ = w.consume(w.end - preserved - remaining.len);
}
};
-test "generate a Huffman code from an array of frequencies" {
- var freqs: [19]u16 = [_]u16{
- 8, // 0
- 1, // 1
- 1, // 2
- 2, // 3
- 5, // 4
- 10, // 5
- 9, // 6
- 1, // 7
- 0, // 8
- 0, // 9
- 0, // 10
- 0, // 11
- 0, // 12
- 0, // 13
- 0, // 14
- 0, // 15
- 1, // 16
- 3, // 17
- 5, // 18
+test Raw {
+ const data_buf = try std.testing.allocator.create([4 * 65536]u8);
+ defer if (!builtin.fuzz) std.testing.allocator.destroy(data_buf);
+ var prng: std.Random.DefaultPrng = .init(std.testing.random_seed);
+ prng.random().bytes(data_buf);
+ try std.testing.fuzz(data_buf, testFuzzedRawInput, .{});
+}
+
+fn countVec(data: []const []const u8) usize {
+ var bytes: usize = 0;
+ for (data) |d| bytes += d.len;
+ return bytes;
+}
+
+fn testFuzzedRawInput(data_buf: *const [4 * 65536]u8, input: []const u8) !void {
+ const HashedStoreWriter = struct {
+ writer: Writer,
+ state: enum {
+ header,
+ block_header,
+ block_body,
+ final_block_body,
+ footer,
+ end,
+ },
+ block_remaining: u16,
+ container: flate.Container,
+ data_hash: flate.Container.Hasher,
+ data_size: usize,
+ footer_hash: u32,
+ footer_size: u32,
+
+ pub fn init(buf: []u8, container: flate.Container) @This() {
+ return .{
+ .writer = .{
+ .vtable = &.{
+ .drain = @This().drain,
+ .flush = @This().flush,
+ },
+ .buffer = buf,
+ },
+ .state = .header,
+ .block_remaining = 0,
+ .container = container,
+ .data_hash = .init(container),
+ .data_size = 0,
+ .footer_hash = undefined,
+ .footer_size = undefined,
+ };
+ }
+
+ /// Note that this implementation is somewhat dependent on the implementation of
+ /// `Raw` by expecting headers / footers to be continous in data elements. It
+ /// also expects the header to be the same as `flate.Container.header` and not
+ /// for multiple streams to be concatenated.
+ fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
+ errdefer w.* = .failing;
+ var h: *@This() = @fieldParentPtr("writer", w);
+
+ var rem_splat = splat;
+ var rem_data = data;
+ var rem_data_elem: []const u8 = w.buffered();
+
+ data_loop: while (true) {
+ const wanted = switch (h.state) {
+ .header => h.container.headerSize(),
+ .block_header => 5,
+ .block_body, .final_block_body => h.block_remaining,
+ .footer => h.container.footerSize(),
+ .end => 1,
+ };
+
+ if (wanted != 0) {
+ while (rem_data_elem.len == 0) {
+ rem_data_elem = rem_data[0];
+ if (rem_data.len != 1) {
+ rem_data = rem_data[1..];
+ } else {
+ if (rem_splat == 0) {
+ break :data_loop;
+ } else {
+ rem_splat -= 1;
+ }
+ }
+ }
+ }
+
+ const bytes = Io.Limit.limited(wanted).sliceConst(rem_data_elem);
+ rem_data_elem = rem_data_elem[bytes.len..];
+
+ switch (h.state) {
+ .header => {
+ if (bytes.len < wanted)
+ return error.WriteFailed; // header eos
+ if (!mem.eql(u8, bytes, h.container.header()))
+ return error.WriteFailed; // wrong header
+ h.state = .block_header;
+ },
+ .block_header => {
+ if (bytes.len < wanted)
+ return error.WriteFailed; // store block header eos
+ const header: BlockHeader = @bitCast(@as(u3, @truncate(bytes[0])));
+ if (header.kind != .stored)
+ return error.WriteFailed; // non-store block
+ const len = mem.readInt(u16, bytes[1..3], .little);
+ const nlen = mem.readInt(u16, bytes[3..5], .little);
+ if (nlen != ~len)
+ return error.WriteFailed; // wrong nlen
+ h.block_remaining = len;
+ h.state = if (!header.final) .block_body else .final_block_body;
+ },
+ .block_body, .final_block_body => {
+ h.data_hash.update(bytes);
+ h.data_size += bytes.len;
+ h.block_remaining -= @intCast(bytes.len);
+ if (h.block_remaining == 0) {
+ h.state = if (h.state != .final_block_body) .block_header else .footer;
+ }
+ },
+ .footer => {
+ if (bytes.len < wanted)
+ return error.WriteFailed; // footer eos
+ switch (h.container) {
+ .raw => {},
+ .gzip => {
+ h.footer_hash = mem.readInt(u32, bytes[0..4], .little);
+ h.footer_size = mem.readInt(u32, bytes[4..8], .little);
+ },
+ .zlib => {
+ h.footer_hash = mem.readInt(u32, bytes[0..4], .big);
+ },
+ }
+ h.state = .end;
+ },
+ .end => return error.WriteFailed, // data past end
+ }
+ }
+
+ w.end = 0;
+ return Writer.countSplat(data, splat);
+ }
+
+ fn flush(w: *Writer) Writer.Error!void {
+ defer w.* = .failing; // Clears buffer even if state hasn't reached `end`
+ _ = try @This().drain(w, &.{""}, 0);
+ }
};
- var codes: [19]HuffmanEncoder.Code = undefined;
- var enc: HuffmanEncoder = .{
- .codes = &codes,
- .freq_cache = undefined,
- .bit_count = undefined,
- .lns = undefined,
- .lfs = undefined,
+ var in: Io.Reader = .fixed(input);
+ const opts: packed struct(u19) {
+ container: PackedContainer,
+ buf_len: u17,
+ } = @bitCast(in.takeLeb128(u19) catch 0);
+ var output: HashedStoreWriter = .init(&.{}, opts.container.val());
+ var r_buf: [2 * 65536]u8 = undefined;
+ var r: Raw = try .init(
+ &output.writer,
+ r_buf[0 .. opts.buf_len +% flate.max_window_len],
+ opts.container.val(),
+ );
+
+ var data_base: u18 = 0;
+ var expected_hash: flate.Container.Hasher = .init(opts.container.val());
+ var expected_size: u32 = 0;
+ var vecs: [32][]const u8 = undefined;
+ var vecs_n: usize = 0;
+
+ while (in.seek != in.end) {
+ const VecInfo = packed struct(u58) {
+ output: bool,
+ /// If set, `data_len` and `splat` are reinterpreted as `capacity`
+ /// and `preserve_len` respectively and `output` is treated as set.
+ rebase: bool,
+ block_aligning_len: bool,
+ block_aligning_splat: bool,
+ data_len: u18,
+ splat: u18,
+ data_off: u18,
+ };
+ var vec_info: VecInfo = @bitCast(in.takeLeb128(u58) catch |e| switch (e) {
+ error.ReadFailed => unreachable,
+ error.Overflow, error.EndOfStream => 0,
+ });
+
+ {
+ const buffered = r.writer.buffered().len + countVec(vecs[0..vecs_n]);
+ const to_align = mem.alignForwardAnyAlign(usize, buffered, Raw.max_block_size) - buffered;
+ assert((buffered + to_align) % Raw.max_block_size == 0);
+
+ if (vec_info.block_aligning_len) {
+ vec_info.data_len = @intCast(to_align);
+ } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and
+ to_align % vec_info.data_len == 0)
+ {
+ vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1;
+ }
+ }
+
+ var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1;
+ add_vec: {
+ if (vec_info.rebase) break :add_vec;
+ if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) >
+ 10 * (1 << 16))
+ {
+ // Skip this vector to avoid this test taking too long.
+ // 10 maximum sized blocks is choosen as the limit since it is two more
+ // than the maximum the implementation can output in one drain.
+ splat = 1;
+ break :add_vec;
+ }
+
+ vecs[vecs_n] = data_buf[@min(
+ data_base +% vec_info.data_off,
+ data_buf.len - vec_info.data_len,
+ )..][0..vec_info.data_len];
+
+ data_base +%= vec_info.data_len +% 3; // extra 3 to help catch aliasing bugs
+
+ for (0..splat) |_| expected_hash.update(vecs[vecs_n]);
+ expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat;
+ vecs_n += 1;
+ }
+
+ const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or
+ in.seek == in.end;
+ if (want_drain and vecs_n != 0) {
+ try r.writer.writeSplatAll(vecs[0..vecs_n], splat);
+ vecs_n = 0;
+ } else assert(splat == 1);
+
+ if (vec_info.rebase) {
+ try r.writer.rebase(vec_info.data_len, @min(
+ r.writer.buffer.len -| vec_info.data_len,
+ vec_info.splat,
+ ));
+ }
+ }
+
+ try r.writer.flush();
+ try output.writer.flush();
+
+ try std.testing.expectEqual(.end, output.state);
+ try std.testing.expectEqual(expected_size, output.data_size);
+ switch (output.data_hash) {
+ .raw => {},
+ .gzip => |gz| {
+ const expected_crc = expected_hash.gzip.crc.final();
+ try std.testing.expectEqual(expected_crc, gz.crc.final());
+ try std.testing.expectEqual(expected_crc, output.footer_hash);
+ try std.testing.expectEqual(expected_size, output.footer_size);
+ },
+ .zlib => |zl| {
+ const expected_adler = expected_hash.zlib.adler;
+ try std.testing.expectEqual(expected_adler, zl.adler);
+ try std.testing.expectEqual(expected_adler, output.footer_hash);
+ },
+ }
+}
+
+/// Only performs huffman compression on data, does no matching.
+pub const Huffman = struct {
+ writer: Writer,
+ bit_writer: BitWriter,
+ hasher: flate.Container.Hasher,
+
+ const max_tokens: u16 = 65535 - 1; // one is reserved for EOF
+
+ /// While there is no minimum buffer size, it is recommended
+ /// to be at least `flate.max_window_len` to improve compression.
+ ///
+ /// It is asserted `output` has a capacity of at least 8 bytes.
+ pub fn init(output: *Writer, buffer: []u8, container: flate.Container) Writer.Error!Huffman {
+ assert(output.buffer.len > 8);
+
+ try output.writeAll(container.header());
+ return .{
+ .writer = .{
+ .buffer = buffer,
+ .vtable = &.{
+ .drain = Huffman.drain,
+ .flush = Huffman.flush,
+ .rebase = Huffman.rebase,
+ },
+ },
+ .bit_writer = .init(output),
+ .hasher = .init(container),
+ };
+ }
+
+ fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
+ {
+ //std.debug.print("drain {} (buffered)", .{w.buffered().len});
+ //for (data) |d| std.debug.print("\n\t+ {}", .{d.len});
+ //std.debug.print(" x {}\n\n", .{splat});
+ }
+
+ const h: *Huffman = @fieldParentPtr("writer", w);
+ const min_block = @min(w.buffer.len, max_tokens);
+ const pattern = data[data.len - 1];
+
+ const data_bytes = Writer.countSplat(data, splat);
+ const total_bytes = w.end + data_bytes;
+ var rem_bytes = total_bytes;
+ var rem_splat = splat;
+ var rem_data = data;
+ var rem_data_elem: []const u8 = w.buffered();
+
+ assert(rem_bytes > min_block);
+ while (rem_bytes > min_block) { // not >= to allow `min_block` blocks to be marked as final
+ // also, it handles the case of `min_block` being zero (no buffer)
+ const block_size: u16 = @min(rem_bytes, max_tokens);
+ rem_bytes -= block_size;
+
+ // Count frequencies
+ comptime assert(max_tokens != 65535);
+ var freqs: [257]u16 = @splat(0);
+ freqs[256] = 1;
+
+ const start_splat = rem_splat;
+ const start_data = rem_data;
+ const start_data_elem = rem_data_elem;
+
+ var block_limit: Io.Limit = .limited(block_size);
+ while (true) {
+ const bytes = block_limit.sliceConst(rem_data_elem);
+ const is_pattern = rem_splat != splat and bytes.len == pattern.len;
+
+ const mul = if (!is_pattern) 1 else @intFromEnum(block_limit) / pattern.len;
+ assert(mul != 0);
+ if (is_pattern) assert(mul <= rem_splat + 1); // one more for `rem_data`
+
+ for (bytes) |b| freqs[b] += @intCast(mul);
+ rem_data_elem = rem_data_elem[bytes.len..];
+ block_limit = block_limit.subtract(bytes.len * mul).?;
+
+ if (rem_data_elem.len == 0) {
+ rem_data_elem = rem_data[0];
+ if (rem_data.len != 1) {
+ rem_data = rem_data[1..];
+ } else if (rem_splat >= mul) {
+ // if the counter was not the pattern, `mul` is always one, otherwise,
+ // `mul` contains `rem_data`, however one more needs subtracted anyways
+ // since the next pattern is also being taken.
+ rem_splat -= mul;
+ } else {
+ // All of `data` has been consumed.
+ assert(block_limit == .nothing);
+ assert(rem_bytes == 0);
+ // Since `rem_bytes` and `block_limit` are zero, these won't be used.
+ rem_data = undefined;
+ rem_data_elem = undefined;
+ rem_splat = undefined;
+ }
+ }
+ if (block_limit == .nothing) break;
+ }
+
+ // Output block
+ rem_splat = start_splat;
+ rem_data = start_data;
+ rem_data_elem = start_data_elem;
+ block_limit = .limited(block_size);
+
+ var codes_buf: CodesBuf = .init;
+ if (try h.outputHeader(&freqs, &codes_buf, block_size, false)) |table| {
+ while (true) {
+ const bytes = block_limit.sliceConst(rem_data_elem);
+ rem_data_elem = rem_data_elem[bytes.len..];
+ block_limit = block_limit.subtract(bytes.len).?;
+
+ h.hasher.update(bytes);
+ for (bytes) |b| {
+ try h.bit_writer.write(table.codes[b], table.bits[b]);
+ }
+
+ if (rem_data_elem.len == 0) {
+ rem_data_elem = rem_data[0];
+ if (rem_data.len != 1) {
+ rem_data = rem_data[1..];
+ } else if (rem_splat != 0) {
+ rem_splat -= 1;
+ } else {
+ // All of `data` has been consumed.
+ assert(block_limit == .nothing);
+ assert(rem_bytes == 0);
+ // Since `rem_bytes` and `block_limit` are zero, these won't be used.
+ rem_data = undefined;
+ rem_data_elem = undefined;
+ rem_splat = undefined;
+ }
+ }
+ if (block_limit == .nothing) break;
+ }
+ try h.bit_writer.write(table.codes[256], table.bits[256]);
+ } else while (true) {
+ // Store block
+
+ // Write data that is not a full vector element
+ const in_pattern = rem_splat != splat;
+ const vec_elem_i, const in_data =
+ @subWithOverflow(data.len - (rem_data.len - @intFromBool(in_pattern)), 1);
+ const is_elem = in_data == 0 and data[vec_elem_i].len == rem_data_elem.len;
+
+ if (!is_elem or rem_data_elem.len > @intFromEnum(block_limit)) {
+ block_limit = block_limit.subtract(rem_data_elem.len) orelse {
+ try h.bit_writer.output.writeAll(rem_data_elem[0..@intFromEnum(block_limit)]);
+ h.hasher.update(rem_data_elem[0..@intFromEnum(block_limit)]);
+ rem_data_elem = rem_data_elem[@intFromEnum(block_limit)..];
+ assert(rem_data_elem.len != 0);
+ break;
+ };
+ try h.bit_writer.output.writeAll(rem_data_elem);
+ h.hasher.update(rem_data_elem);
+ } else {
+ // Put `rem_data_elem` back in `rem_data`
+ if (!in_pattern) {
+ rem_data = data[vec_elem_i..];
+ } else {
+ rem_splat += 1;
+ }
+ }
+ rem_data_elem = undefined; // it is always updated below
+
+ // Send through as much of the original vector as possible
+ var vec_n: usize = 0;
+ var vlimit = block_limit;
+ const vec_splat = while (rem_data[vec_n..].len != 1) {
+ vlimit = vlimit.subtract(rem_data[vec_n].len) orelse break 1;
+ vec_n += 1;
+ } else vec_splat: {
+ // For `pattern.len == 0`, the value of `vec_splat` does not matter.
+ const vec_splat = @intFromEnum(vlimit) / @max(1, pattern.len);
+ if (pattern.len != 0) assert(vec_splat <= rem_splat + 1);
+ vlimit = vlimit.subtract(pattern.len * vec_splat).?;
+ vec_n += 1;
+ break :vec_splat vec_splat;
+ };
+
+ const n = if (vec_n != 0) n: {
+ assert(@intFromEnum(block_limit) - @intFromEnum(vlimit) ==
+ Writer.countSplat(rem_data[0..vec_n], vec_splat));
+ break :n try h.bit_writer.output.writeSplat(rem_data[0..vec_n], vec_splat);
+ } else 0; // Still go into the case below to advance the vector
+ block_limit = block_limit.subtract(n).?;
+ var consumed: Io.Limit = .limited(n);
+
+ while (rem_data.len != 1) {
+ const elem = rem_data[0];
+ rem_data = rem_data[1..];
+ consumed = consumed.subtract(elem.len) orelse {
+ h.hasher.update(elem[0..@intFromEnum(consumed)]);
+ rem_data_elem = elem[@intFromEnum(consumed)..];
+ break;
+ };
+ h.hasher.update(elem);
+ } else {
+ if (pattern.len == 0) {
+ // All of `data` has been consumed. However, the general
+ // case below does not work since it divides by zero.
+ assert(consumed == .nothing);
+ assert(block_limit == .nothing);
+ assert(rem_bytes == 0);
+ // Since `rem_bytes` and `block_limit` are zero, these won't be used.
+ rem_splat = undefined;
+ rem_data = undefined;
+ rem_data_elem = undefined;
+ break;
+ }
+
+ const splatted = @intFromEnum(consumed) / pattern.len;
+ const partial = @intFromEnum(consumed) % pattern.len;
+ for (0..splatted) |_| h.hasher.update(pattern);
+ h.hasher.update(pattern[0..partial]);
+
+ const taken_splat = splatted + 1;
+ if (rem_splat >= taken_splat) {
+ rem_splat -= taken_splat;
+ rem_data_elem = pattern[partial..];
+ } else {
+ // All of `data` has been consumed.
+ assert(partial == 0);
+ assert(block_limit == .nothing);
+ assert(rem_bytes == 0);
+ // Since `rem_bytes` and `block_limit` are zero, these won't be used.
+ rem_data = undefined;
+ rem_data_elem = undefined;
+ rem_splat = undefined;
+ }
+ }
+
+ if (block_limit == .nothing) break;
+ }
+ }
+
+ if (rem_bytes > data_bytes) {
+ assert(rem_bytes - data_bytes == rem_data_elem.len);
+ assert(&rem_data_elem[0] == &w.buffer[total_bytes - rem_bytes]);
+ }
+ return w.consume(total_bytes - rem_bytes);
+ }
+
+ fn flush(w: *Writer) Writer.Error!void {
+ defer w.* = .failing;
+ const h: *Huffman = @fieldParentPtr("writer", w);
+ try Huffman.rebaseInner(w, 0, w.buffer.len, true);
+ try h.bit_writer.output.rebase(0, 1);
+ h.bit_writer.byteAlign();
+ try h.hasher.writeFooter(h.bit_writer.output);
+ }
+
+ fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void {
+ errdefer w.* = .failing;
+ try Huffman.rebaseInner(w, preserve, capacity, false);
+ }
+
+ fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void {
+ const h: *Huffman = @fieldParentPtr("writer", w);
+ assert(preserve + capacity <= w.buffer.len);
+ if (eos) assert(capacity == w.buffer.len);
+
+ const preserved = @min(w.end, preserve);
+ var remaining = w.buffer[0 .. w.end - preserved];
+ while (remaining.len > max_tokens) { // not >= so there is always a block down below
+ const bytes = remaining[0..max_tokens];
+ remaining = remaining[max_tokens..];
+ try h.outputBytes(bytes, false);
+ }
+
+ // eos check required for empty block
+ if (w.buffer.len - (remaining.len + preserved) < capacity or eos) {
+ const bytes = remaining;
+ remaining = &.{};
+ try h.outputBytes(bytes, eos);
+ }
+
+ _ = w.consume(w.end - preserved - remaining.len);
+ }
+
+ fn outputBytes(h: *Huffman, bytes: []const u8, eos: bool) Writer.Error!void {
+ comptime assert(max_tokens != 65535);
+ assert(bytes.len <= max_tokens);
+ var freqs: [257]u16 = @splat(0);
+ freqs[256] = 1;
+ for (bytes) |b| freqs[b] += 1;
+ h.hasher.update(bytes);
+
+ var codes_buf: CodesBuf = .init;
+ if (try h.outputHeader(&freqs, &codes_buf, @intCast(bytes.len), eos)) |table| {
+ for (bytes) |b| {
+ try h.bit_writer.write(table.codes[b], table.bits[b]);
+ }
+ try h.bit_writer.write(table.codes[256], table.bits[256]);
+ } else {
+ try h.bit_writer.output.writeAll(bytes);
+ }
+ }
+
+ const CodesBuf = struct {
+ dyn_codes: [258]u16,
+ dyn_bits: [258]u4,
+
+ pub const init: CodesBuf = .{
+ .dyn_codes = @as([257]u16, undefined) ++ .{0},
+ .dyn_bits = @as([257]u4, @splat(0)) ++ .{1},
+ };
};
- enc.generate(freqs[0..], 7);
-
- try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
-
- try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
- try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
- try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
- try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
- try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
- try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
- try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
- try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
- try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
- try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
- try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
-
- try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
- try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
- try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
- try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
- try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
- try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
- try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
- try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
- try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
- try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
- try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
+
+ /// Returns null if the block is stored.
+ fn outputHeader(
+ h: *Huffman,
+ freqs: *const [257]u16,
+ buf: *CodesBuf,
+ bytes: u16,
+ eos: bool,
+ ) Writer.Error!?struct {
+ codes: *const [257]u16,
+ bits: *const [257]u4,
+ } {
+ assert(freqs[256] == 1);
+ const dyn_codes_bitsize, _ = huffman.build(
+ freqs,
+ buf.dyn_codes[0..257],
+ buf.dyn_bits[0..257],
+ 15,
+ true,
+ );
+
+ var clen_values: [258]u8 = undefined;
+ var clen_extra: [258]u8 = undefined;
+ var clen_freqs: [19]u16 = @splat(0);
+ const clen_len, const clen_extra_bitsize = buildClen(
+ &buf.dyn_bits,
+ &clen_values,
+ &clen_extra,
+ &clen_freqs,
+ );
+
+ var clen_codes: [19]u16 = undefined;
+ var clen_bits: [19]u4 = @splat(0);
+ const clen_codes_bitsize, _ = huffman.build(
+ &clen_freqs,
+ &clen_codes,
+ &clen_bits,
+ 7,
+ false,
+ );
+ const hclen = clenHlen(clen_freqs);
+
+ const dynamic_bitsize = @as(u32, 14) +
+ (4 + @as(u6, hclen)) * 3 + clen_codes_bitsize + clen_extra_bitsize +
+ dyn_codes_bitsize;
+ const fixed_bitsize = n: {
+ const freq7 = 1; // eos
+ var freq9: u16 = 0;
+ for (freqs[144..256]) |f| freq9 += f;
+ const freq8: u16 = bytes - freq9;
+ break :n @as(u32, freq7) * 7 + @as(u32, freq8) * 8 + @as(u32, freq9) * 9;
+ };
+ const stored_bitsize = n: {
+ const stored_align_bits = -%(h.bit_writer.buffered_n +% 3);
+ break :n stored_align_bits + @as(u32, 32) + @as(u32, bytes) * 8;
+ };
+
+ //std.debug.print("@ {}{{{}}} ", .{ h.bit_writer.output.end, h.bit_writer.buffered_n });
+ //std.debug.print("#{} -> s {} f {} d {}\n", .{ bytes, stored_bitsize, fixed_bitsize, dynamic_bitsize });
+
+ if (stored_bitsize <= @min(dynamic_bitsize, fixed_bitsize)) {
+ try h.bit_writer.write(BlockHeader.int(.{ .kind = .stored, .final = eos }), 3);
+ try h.bit_writer.output.rebase(0, 5);
+ h.bit_writer.byteAlign();
+ h.bit_writer.output.writeInt(u16, bytes, .little) catch unreachable;
+ h.bit_writer.output.writeInt(u16, ~bytes, .little) catch unreachable;
+ return null;
+ }
+
+ if (fixed_bitsize <= dynamic_bitsize) {
+ try h.bit_writer.write(BlockHeader.int(.{ .final = eos, .kind = .fixed }), 3);
+ return .{
+ .codes = token.fixed_lit_codes[0..257],
+ .bits = token.fixed_lit_bits[0..257],
+ };
+ } else {
+ try h.bit_writer.write(BlockHeader.Dynamic.int(.{
+ .regular = .{ .final = eos, .kind = .dynamic },
+ .hlit = 0,
+ .hdist = 0,
+ .hclen = hclen,
+ }), 17);
+ try h.bit_writer.writeClen(
+ hclen,
+ clen_values[0..clen_len],
+ clen_extra[0..clen_len],
+ clen_codes,
+ clen_bits,
+ );
+ return .{ .codes = buf.dyn_codes[0..257], .bits = buf.dyn_bits[0..257] };
+ }
+ }
+};
+
+test Huffman {
+ const fbufs = try testingFreqBufs();
+ defer if (!builtin.fuzz) std.testing.allocator.destroy(fbufs);
+ try std.testing.fuzz(fbufs, testFuzzedHuffmanInput, .{});
+}
+
+/// This function is derived from `testFuzzedRawInput` with a few changes for fuzzing `Huffman`.
+fn testFuzzedHuffmanInput(fbufs: *const [2][65536]u8, input: []const u8) !void {
+ var in: Io.Reader = .fixed(input);
+ const opts: packed struct(u19) {
+ container: PackedContainer,
+ buf_len: u17,
+ } = @bitCast(in.takeLeb128(u19) catch 0);
+ var flate_buf: [2 * 65536]u8 = undefined;
+ var flate_w: Writer = .fixed(&flate_buf);
+ var h_buf: [2 * 65536]u8 = undefined;
+ var h: Huffman = try .init(
+ &flate_w,
+ h_buf[0 .. opts.buf_len +% flate.max_window_len],
+ opts.container.val(),
+ );
+
+ var expected_hash: flate.Container.Hasher = .init(opts.container.val());
+ var expected_size: u32 = 0;
+ var vecs: [32][]const u8 = undefined;
+ var vecs_n: usize = 0;
+
+ while (in.seek != in.end) {
+ const VecInfo = packed struct(u55) {
+ output: bool,
+ /// If set, `data_len` and `splat` are reinterpreted as `capacity`
+ /// and `preserve_len` respectively and `output` is treated as set.
+ rebase: bool,
+ block_aligning_len: bool,
+ block_aligning_splat: bool,
+ data_off_hi: u8,
+ random_data: u1,
+ data_len: u16,
+ splat: u18,
+ /// This is less useful as each value is part of the same gradient 'step'
+ data_off_lo: u8,
+ };
+ var vec_info: VecInfo = @bitCast(in.takeLeb128(u55) catch |e| switch (e) {
+ error.ReadFailed => unreachable,
+ error.Overflow, error.EndOfStream => 0,
+ });
+
+ {
+ const buffered = h.writer.buffered().len + countVec(vecs[0..vecs_n]);
+ const to_align = mem.alignForwardAnyAlign(usize, buffered, Huffman.max_tokens) - buffered;
+ assert((buffered + to_align) % Huffman.max_tokens == 0);
+
+ if (vec_info.block_aligning_len) {
+ vec_info.data_len = @intCast(to_align);
+ } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and
+ to_align % vec_info.data_len == 0)
+ {
+ vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1;
+ }
+ }
+
+ var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1;
+ add_vec: {
+ if (vec_info.rebase) break :add_vec;
+ if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) > 4 * (1 << 16)) {
+ // Skip this vector to avoid this test taking too long.
+ splat = 1;
+ break :add_vec;
+ }
+
+ const data_buf = &fbufs[vec_info.random_data];
+ vecs[vecs_n] = data_buf[@min(
+ (@as(u16, vec_info.data_off_hi) << 8) | vec_info.data_off_lo,
+ data_buf.len - vec_info.data_len,
+ )..][0..vec_info.data_len];
+
+ for (0..splat) |_| expected_hash.update(vecs[vecs_n]);
+ expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat;
+ vecs_n += 1;
+ }
+
+ const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or
+ in.seek == in.end;
+ if (want_drain and vecs_n != 0) {
+ var n = h.writer.buffered().len + Writer.countSplat(vecs[0..vecs_n], splat);
+ const oos = h.writer.writeSplatAll(vecs[0..vecs_n], splat) == error.WriteFailed;
+ n -= h.writer.buffered().len;
+ const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable;
+ const lim = flate_w.end + 6 * block_lim + n; // 6 since block header may span two bytes
+ if (flate_w.end > lim) return error.OverheadTooLarge;
+ if (oos) return;
+
+ vecs_n = 0;
+ } else assert(splat == 1);
+
+ if (vec_info.rebase) {
+ const old_end = flate_w.end;
+ var n = h.writer.buffered().len;
+ const oos = h.writer.rebase(vec_info.data_len, @min(
+ h.writer.buffer.len -| vec_info.data_len,
+ vec_info.splat,
+ )) == error.WriteFailed;
+ n -= h.writer.buffered().len;
+ const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable;
+ const lim = old_end + 6 * block_lim + n; // 6 since block header may span two bytes
+ if (flate_w.end > lim) return error.OverheadTooLarge;
+ if (oos) return;
+ }
+ }
+
+ {
+ const old_end = flate_w.end;
+ const n = h.writer.buffered().len;
+ const oos = h.writer.flush() == error.WriteFailed;
+ assert(h.writer.buffered().len == 0);
+ const block_lim = @max(1, math.divCeil(usize, n, Huffman.max_tokens) catch unreachable);
+ const lim = old_end + 6 * block_lim + n + opts.container.val().footerSize();
+ if (flate_w.end > lim) return error.OverheadTooLarge;
+ if (oos) return;
+ }
+
+ try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash);
}
lib/std/compress/flate/Decompress.zig
@@ -7,11 +7,10 @@ const Reader = std.Io.Reader;
const Container = flate.Container;
const Decompress = @This();
-const Token = @import("Token.zig");
+const token = @import("token.zig");
input: *Reader,
-next_bits: Bits,
-remaining_bits: std.math.Log2Int(Bits),
+consumed_bits: u3,
reader: Reader,
@@ -25,8 +24,6 @@ state: State,
err: ?Error,
-const Bits = usize;
-
const BlockType = enum(u2) {
stored = 0,
fixed = 1,
@@ -39,6 +36,8 @@ const State = union(enum) {
block_header,
stored_block: u16,
fixed_block,
+ fixed_block_literal: u8,
+ fixed_block_match: u16,
dynamic_block,
dynamic_block_literal: u8,
dynamic_block_match: u16,
@@ -87,8 +86,7 @@ pub fn init(input: *Reader, container: Container, buffer: []u8) Decompress {
.end = 0,
},
.input = input,
- .next_bits = 0,
- .remaining_bits = 0,
+ .consumed_bits = 0,
.container_metadata = .init(container),
.lit_dec = .{},
.dst_dec = .{},
@@ -183,27 +181,25 @@ fn streamIndirectInner(d: *Decompress) Reader.Error!usize {
return 0;
}
-fn decodeLength(self: *Decompress, code: u8) !u16 {
- if (code > 28) return error.InvalidCode;
- const ml = Token.matchLength(code);
- return if (ml.extra_bits == 0) // 0 - 5 extra bits
- ml.base
- else
- ml.base + try self.takeBitsRuntime(ml.extra_bits);
+fn decodeLength(self: *Decompress, code_int: u5) !u16 {
+ if (code_int > 28) return error.InvalidCode;
+ const l: token.LenCode = .fromInt(code_int);
+ const base = l.base();
+ const extra = l.extraBits();
+ return token.min_length + (base | try self.takeBits(extra));
}
-fn decodeDistance(self: *Decompress, code: u8) !u16 {
- if (code > 29) return error.InvalidCode;
- const md = Token.matchDistance(code);
- return if (md.extra_bits == 0) // 0 - 13 extra bits
- md.base
- else
- md.base + try self.takeBitsRuntime(md.extra_bits);
+fn decodeDistance(self: *Decompress, code_int: u5) !u16 {
+ if (code_int > 29) return error.InvalidCode;
+ const d: token.DistCode = .fromInt(code_int);
+ const base = d.base();
+ const extra = d.extraBits();
+ return token.min_distance + (base | try self.takeBits(extra));
}
-// Decode code length symbol to code length. Writes decoded length into
-// lens slice starting at position pos. Returns number of positions
-// advanced.
+/// Decode code length symbol to code length. Writes decoded length into
+/// lens slice starting at position pos. Returns number of positions
+/// advanced.
fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usize {
if (pos >= lens.len)
return error.InvalidDynamicBlockHeader;
@@ -217,7 +213,7 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz
16 => {
// Copy the previous code length 3 - 6 times.
// The next 2 bits indicate repeat length
- const n: u8 = @as(u8, try self.takeBits(u2)) + 3;
+ const n: u8 = @as(u8, try self.takeIntBits(u2)) + 3;
if (pos == 0 or pos + n > lens.len)
return error.InvalidDynamicBlockHeader;
for (0..n) |i| {
@@ -226,17 +222,17 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz
return n;
},
// Repeat a code length of 0 for 3 - 10 times. (3 bits of length)
- 17 => return @as(u8, try self.takeBits(u3)) + 3,
+ 17 => return @as(u8, try self.takeIntBits(u3)) + 3,
// Repeat a code length of 0 for 11 - 138 times (7 bits of length)
- 18 => return @as(u8, try self.takeBits(u7)) + 11,
+ 18 => return @as(u8, try self.takeIntBits(u7)) + 11,
else => return error.InvalidDynamicBlockHeader,
}
}
fn decodeSymbol(self: *Decompress, decoder: anytype) !Symbol {
// Maximum code len is 15 bits.
- const sym = try decoder.find(@bitReverse(try self.peekBits(u15)));
- try self.tossBits(sym.code_bits);
+ const sym = try decoder.find(@bitReverse(try self.peekIntBitsShort(u15)));
+ try self.tossBitsShort(sym.code_bits);
return sym;
}
@@ -320,11 +316,11 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
.raw => continue :sw .block_header,
},
.block_header => {
- d.final_block = (try d.takeBits(u1)) != 0;
- const block_type: BlockType = @enumFromInt(try d.takeBits(u2));
+ d.final_block = (try d.takeIntBits(u1)) != 0;
+ const block_type: BlockType = @enumFromInt(try d.takeIntBits(u2));
switch (block_type) {
.stored => {
- d.alignBitsDiscarding();
+ d.alignBitsForward();
// everything after this is byte aligned in stored block
const len = try in.takeInt(u16, .little);
const nlen = try in.takeInt(u16, .little);
@@ -333,17 +329,17 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
},
.fixed => continue :sw .fixed_block,
.dynamic => {
- const hlit: u16 = @as(u16, try d.takeBits(u5)) + 257; // number of ll code entries present - 257
- const hdist: u16 = @as(u16, try d.takeBits(u5)) + 1; // number of distance code entries - 1
- const hclen: u8 = @as(u8, try d.takeBits(u4)) + 4; // hclen + 4 code lengths are encoded
+ const hlit: u16 = @as(u16, try d.takeIntBits(u5)) + 257; // number of ll code entries present - 257
+ const hdist: u16 = @as(u16, try d.takeIntBits(u5)) + 1; // number of distance code entries - 1
+ const hclen: u8 = @as(u8, try d.takeIntBits(u4)) + 4; // hclen + 4 code lengths are encoded
if (hlit > 286 or hdist > 30)
return error.InvalidDynamicBlockHeader;
// lengths for code lengths
var cl_lens: [19]u4 = @splat(0);
- for (flate.HuffmanEncoder.codegen_order[0..hclen]) |i| {
- cl_lens[i] = try d.takeBits(u3);
+ for (token.codegen_order[0..hclen]) |i| {
+ cl_lens[i] = try d.takeIntBits(u3);
}
var cl_dec: CodegenDecoder = .{};
try cl_dec.generate(&cl_lens);
@@ -352,9 +348,9 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
var dec_lens: [286 + 30]u4 = @splat(0);
var pos: usize = 0;
while (pos < hlit + hdist) {
- const peeked = @bitReverse(try d.peekBits(u7));
+ const peeked = @bitReverse(try d.peekIntBitsShort(u7));
const sym = try cl_dec.find(peeked);
- try d.tossBits(sym.code_bits);
+ try d.tossBitsShort(sym.code_bits);
pos += try d.dynamicCodeLength(sym.symbol, &dec_lens, pos);
}
if (pos > hlit + hdist) {
@@ -373,9 +369,12 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
}
},
.stored_block => |remaining_len| {
- const out = try w.writableSliceGreedyPreserve(flate.history_len, 1);
+ const out: []u8 = if (remaining != 0)
+ try w.writableSliceGreedyPreserve(flate.history_len, 1)
+ else
+ &.{};
var limited_out: [1][]u8 = .{limit.min(.limited(remaining_len)).slice(out)};
- const n = try d.input.readVec(&limited_out);
+ const n = try in.readVec(&limited_out);
if (remaining_len - n == 0) {
d.state = if (d.final_block) .protocol_footer else .block_header;
} else {
@@ -389,8 +388,14 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
const code = try d.readFixedCode();
switch (code) {
0...255 => {
- try w.writeBytePreserve(flate.history_len, @intCast(code));
- remaining -= 1;
+ if (remaining != 0) {
+ @branchHint(.likely);
+ try w.writeBytePreserve(flate.history_len, @intCast(code));
+ remaining -= 1;
+ } else {
+ d.state = .{ .fixed_block_literal = @intCast(code) };
+ return @intFromEnum(limit) - remaining;
+ }
},
256 => {
d.state = if (d.final_block) .protocol_footer else .block_header;
@@ -400,9 +405,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
// Handles fixed block non literal (length) code.
// Length code is followed by 5 bits of distance code.
const length = try d.decodeLength(@intCast(code - 257));
- const distance = try d.decodeDistance(@bitReverse(try d.takeBits(u5)));
- try writeMatch(w, length, distance);
- remaining -= length;
+ continue :sw .{ .fixed_block_match = length };
},
else => return error.InvalidCode,
}
@@ -410,6 +413,24 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
d.state = .fixed_block;
return @intFromEnum(limit) - remaining;
},
+ .fixed_block_literal => |symbol| {
+ assert(remaining != 0);
+ remaining -= 1;
+ try w.writeBytePreserve(flate.history_len, symbol);
+ continue :sw .fixed_block;
+ },
+ .fixed_block_match => |length| {
+ if (remaining >= length) {
+ @branchHint(.likely);
+ const distance = try d.decodeDistance(@bitReverse(try d.takeIntBits(u5)));
+ try writeMatch(w, length, distance);
+ remaining -= length;
+ continue :sw .fixed_block;
+ } else {
+ d.state = .{ .fixed_block_match = length };
+ return @intFromEnum(limit) - remaining;
+ }
+ },
.dynamic_block => {
// In larger archives most blocks are usually dynamic, so
// decompression performance depends on this logic.
@@ -429,7 +450,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
},
.match => {
// Decode match backreference <length, distance>
- const length = try d.decodeLength(sym.symbol);
+ const length = try d.decodeLength(@intCast(sym.symbol));
continue :sw .{ .dynamic_block_match = length };
},
.end_of_block => {
@@ -449,7 +470,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
@branchHint(.likely);
remaining -= length;
const dsm = try d.decodeSymbol(&d.dst_dec);
- const distance = try d.decodeDistance(dsm.symbol);
+ const distance = try d.decodeDistance(@intCast(dsm.symbol));
try writeMatch(w, length, distance);
continue :sw .dynamic_block;
} else {
@@ -458,23 +479,16 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
}
},
.protocol_footer => {
+ d.alignBitsForward();
switch (d.container_metadata) {
.gzip => |*gzip| {
- d.alignBitsDiscarding();
- gzip.* = .{
- .crc = try in.takeInt(u32, .little),
- .count = try in.takeInt(u32, .little),
- };
+ gzip.crc = try in.takeInt(u32, .little);
+ gzip.count = try in.takeInt(u32, .little);
},
.zlib => |*zlib| {
- d.alignBitsDiscarding();
- zlib.* = .{
- .adler = try in.takeInt(u32, .little),
- };
- },
- .raw => {
- d.alignBitsPreserving();
+ zlib.adler = try in.takeInt(u32, .big);
},
+ .raw => {},
}
d.state = .end;
return @intFromEnum(limit) - remaining;
@@ -487,10 +501,10 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
/// back from current write position, and `length` of bytes.
fn writeMatch(w: *Writer, length: u16, distance: u16) !void {
if (w.end < distance) return error.InvalidMatch;
- if (length < Token.base_length) return error.InvalidMatch;
- if (length > Token.max_length) return error.InvalidMatch;
- if (distance < Token.min_distance) return error.InvalidMatch;
- if (distance > Token.max_distance) return error.InvalidMatch;
+ if (length < token.min_length) return error.InvalidMatch;
+ if (length > token.max_length) return error.InvalidMatch;
+ if (distance < token.min_distance) return error.InvalidMatch;
+ if (distance > token.max_distance) return error.InvalidMatch;
// This is not a @memmove; it intentionally repeats patterns caused by
// iterating one byte at a time.
@@ -500,137 +514,71 @@ fn writeMatch(w: *Writer, length: u16, distance: u16) !void {
for (dest, src) |*d, s| d.* = s;
}
-fn takeBits(d: *Decompress, comptime U: type) !U {
- const remaining_bits = d.remaining_bits;
- const next_bits = d.next_bits;
- if (remaining_bits >= @bitSizeOf(U)) {
- const u: U = @truncate(next_bits);
- d.next_bits = next_bits >> @bitSizeOf(U);
- d.remaining_bits = remaining_bits - @bitSizeOf(U);
- return u;
- }
- const in = d.input;
- const next_int = in.takeInt(Bits, .little) catch |err| switch (err) {
- error.ReadFailed => return error.ReadFailed,
- error.EndOfStream => return takeBitsEnding(d, U),
+fn peekBits(d: *Decompress, n: u4) !u16 {
+ const bits = d.input.peekInt(u32, .little) catch |e| return switch (e) {
+ error.ReadFailed => error.ReadFailed,
+ error.EndOfStream => d.peekBitsEnding(n),
};
- const needed_bits = @bitSizeOf(U) - remaining_bits;
- const u: U = @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits);
- d.next_bits = next_int >> needed_bits;
- d.remaining_bits = @intCast(@bitSizeOf(Bits) - @as(usize, needed_bits));
- return u;
+ const mask = @shlExact(@as(u16, 1), n) - 1;
+ return @intCast((bits >> d.consumed_bits) & mask);
}
-fn takeBitsEnding(d: *Decompress, comptime U: type) !U {
- const remaining_bits = d.remaining_bits;
- const next_bits = d.next_bits;
- const in = d.input;
- const n = in.bufferedLen();
- assert(n < @sizeOf(Bits));
- const needed_bits = @bitSizeOf(U) - remaining_bits;
- if (n * 8 < needed_bits) return error.EndOfStream;
- const next_int = in.takeVarInt(Bits, .little, n) catch |err| switch (err) {
- error.ReadFailed => return error.ReadFailed,
- error.EndOfStream => unreachable,
- };
- const u: U = @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits);
- d.next_bits = next_int >> needed_bits;
- d.remaining_bits = @intCast(n * 8 - @as(usize, needed_bits));
- return u;
+fn peekBitsEnding(d: *Decompress, n: u4) !u16 {
+ @branchHint(.unlikely);
+
+ const left = d.input.buffered();
+ if (left.len * 8 - d.consumed_bits < n) return error.EndOfStream;
+ const bits = std.mem.readVarInt(u32, left, .little);
+ const mask = @shlExact(@as(u16, 1), n) - 1;
+ return @intCast((bits >> d.consumed_bits) & mask);
}
-fn peekBits(d: *Decompress, comptime U: type) !U {
- const remaining_bits = d.remaining_bits;
- const next_bits = d.next_bits;
- if (remaining_bits >= @bitSizeOf(U)) return @truncate(next_bits);
- const in = d.input;
- const next_int = in.peekInt(Bits, .little) catch |err| switch (err) {
- error.ReadFailed => return error.ReadFailed,
- error.EndOfStream => return peekBitsEnding(d, U),
- };
- const needed_bits = @bitSizeOf(U) - remaining_bits;
- return @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits);
+/// Safe only after `peekBits` has been called with a greater or equal `n` value.
+fn tossBits(d: *Decompress, n: u4) void {
+ d.input.toss((@as(u8, n) + d.consumed_bits) / 8);
+ d.consumed_bits +%= @truncate(n);
}
-fn peekBitsEnding(d: *Decompress, comptime U: type) !U {
- const remaining_bits = d.remaining_bits;
- const next_bits = d.next_bits;
- const in = d.input;
- var u: Bits = 0;
- var remaining_needed_bits = @bitSizeOf(U) - remaining_bits;
- var i: usize = 0;
- while (remaining_needed_bits > 0) {
- const peeked = in.peek(i + 1) catch |err| switch (err) {
- error.ReadFailed => return error.ReadFailed,
- error.EndOfStream => break,
- };
- u |= @as(Bits, peeked[i]) << @intCast(i * 8);
- remaining_needed_bits -|= 8;
- i += 1;
- }
- if (remaining_bits == 0 and i == 0) return error.EndOfStream;
- return @truncate((u << remaining_bits) | next_bits);
-}
-
-fn tossBits(d: *Decompress, n: u4) !void {
- const remaining_bits = d.remaining_bits;
- const next_bits = d.next_bits;
- if (remaining_bits >= n) {
- d.next_bits = next_bits >> n;
- d.remaining_bits = remaining_bits - n;
- } else {
- const in = d.input;
- const next_int = in.takeInt(Bits, .little) catch |err| switch (err) {
- error.ReadFailed => return error.ReadFailed,
- error.EndOfStream => return tossBitsEnding(d, n),
- };
- const needed_bits = n - remaining_bits;
- d.next_bits = next_int >> needed_bits;
- d.remaining_bits = @intCast(@bitSizeOf(Bits) - @as(usize, needed_bits));
- }
+fn takeBits(d: *Decompress, n: u4) !u16 {
+ const bits = try d.peekBits(n);
+ d.tossBits(n);
+ return bits;
}
-fn tossBitsEnding(d: *Decompress, n: u4) !void {
- const remaining_bits = d.remaining_bits;
- const in = d.input;
- const buffered_n = in.bufferedLen();
- if (buffered_n == 0) return error.EndOfStream;
- assert(buffered_n < @sizeOf(Bits));
- const needed_bits = n - remaining_bits;
- const next_int = in.takeVarInt(Bits, .little, buffered_n) catch |err| switch (err) {
- error.ReadFailed => return error.ReadFailed,
- error.EndOfStream => unreachable,
+fn alignBitsForward(d: *Decompress) void {
+ d.input.toss(@intFromBool(d.consumed_bits != 0));
+ d.consumed_bits = 0;
+}
+
+fn peekBitsShort(d: *Decompress, n: u4) !u16 {
+ const bits = d.input.peekInt(u32, .little) catch |e| return switch (e) {
+ error.ReadFailed => error.ReadFailed,
+ error.EndOfStream => d.peekBitsShortEnding(n),
};
- d.next_bits = next_int >> needed_bits;
- d.remaining_bits = @intCast(@as(usize, buffered_n) * 8 -| @as(usize, needed_bits));
+ const mask = @shlExact(@as(u16, 1), n) - 1;
+ return @intCast((bits >> d.consumed_bits) & mask);
}
-fn takeBitsRuntime(d: *Decompress, n: u4) !u16 {
- const x = try peekBits(d, u16);
- const mask: u16 = (@as(u16, 1) << n) - 1;
- const u: u16 = @as(u16, @truncate(x)) & mask;
- try tossBits(d, n);
- return u;
+fn peekBitsShortEnding(d: *Decompress, n: u4) !u16 {
+ @branchHint(.unlikely);
+
+ const left = d.input.buffered();
+ const bits = std.mem.readVarInt(u32, left, .little);
+ const mask = @shlExact(@as(u16, 1), n) - 1;
+ return @intCast((bits >> d.consumed_bits) & mask);
}
-fn alignBitsDiscarding(d: *Decompress) void {
- const remaining_bits = d.remaining_bits;
- if (remaining_bits == 0) return;
- const n_bytes = remaining_bits / 8;
- const in = d.input;
- in.seek -= n_bytes;
- d.remaining_bits = 0;
- d.next_bits = 0;
+fn tossBitsShort(d: *Decompress, n: u4) !void {
+ if (d.input.bufferedLen() * 8 + d.consumed_bits < n) return error.EndOfStream;
+ d.tossBits(n);
}
-fn alignBitsPreserving(d: *Decompress) void {
- const remaining_bits: usize = d.remaining_bits;
- if (remaining_bits == 0) return;
- const n_bytes = (remaining_bits + 7) / 8;
- const in = d.input;
- in.seek -= n_bytes;
- d.remaining_bits = 0;
- d.next_bits = 0;
+fn takeIntBits(d: *Decompress, T: type) !T {
+ return @intCast(try d.takeBits(@bitSizeOf(T)));
+}
+
+fn peekIntBitsShort(d: *Decompress, T: type) !T {
+ return @intCast(try d.peekBitsShort(@bitSizeOf(T)));
}
/// Reads first 7 bits, and then maybe 1 or 2 more to get full 7,8 or 9 bit code.
@@ -646,12 +594,12 @@ fn alignBitsPreserving(d: *Decompress) void {
/// 280 - 287 8 11000000 through
/// 11000111
fn readFixedCode(d: *Decompress) !u16 {
- const code7 = @bitReverse(try d.takeBits(u7));
+ const code7 = @bitReverse(try d.takeIntBits(u7));
return switch (code7) {
0...0b0010_111 => @as(u16, code7) + 256,
- 0b0010_111 + 1...0b1011_111 => (@as(u16, code7) << 1) + @as(u16, try d.takeBits(u1)) - 0b0011_0000,
- 0b1011_111 + 1...0b1100_011 => (@as(u16, code7 - 0b1100000) << 1) + try d.takeBits(u1) + 280,
- else => (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, @bitReverse(try d.takeBits(u2))) + 144,
+ 0b0010_111 + 1...0b1011_111 => (@as(u16, code7) << 1) + @as(u16, try d.takeIntBits(u1)) - 0b0011_0000,
+ 0b1011_111 + 1...0b1100_011 => (@as(u16, code7 - 0b1100000) << 1) + try d.takeIntBits(u1) + 280,
+ else => (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, @bitReverse(try d.takeIntBits(u2))) + 144,
};
}
@@ -807,7 +755,7 @@ fn HuffmanDecoder(
return self.findLinked(code, sym.next);
}
- inline fn findLinked(self: *Self, code: u16, start: u16) !Symbol {
+ fn findLinked(self: *Self, code: u16, start: u16) !Symbol {
var pos = start;
while (pos > 0) {
const sym = self.symbols[pos];
@@ -898,57 +846,30 @@ test "init/find" {
}
test "encode/decode literals" {
- var codes: [flate.HuffmanEncoder.max_num_frequencies]flate.HuffmanEncoder.Code = undefined;
- for (1..286) |j| { // for all different number of codes
- var enc: flate.HuffmanEncoder = .{
- .codes = &codes,
- .freq_cache = undefined,
- .bit_count = undefined,
- .lns = undefined,
- .lfs = undefined,
- };
- // create frequencies
- var freq = [_]u16{0} ** 286;
- freq[256] = 1; // ensure we have end of block code
- for (&freq, 1..) |*f, i| {
- if (i % j == 0)
- f.* = @intCast(i);
- }
-
- // encoder from frequencies
- enc.generate(&freq, 15);
-
- // get code_lens from encoder
- var code_lens = [_]u4{0} ** 286;
- for (code_lens, 0..) |_, i| {
- code_lens[i] = @intCast(enc.codes[i].len);
- }
- // generate decoder from code lens
- var dec: LiteralDecoder = .{};
- try dec.generate(&code_lens);
-
- // expect decoder code to match original encoder code
- for (dec.symbols) |s| {
- if (s.code_bits == 0) continue;
- const c_code: u16 = @bitReverse(@as(u15, @intCast(s.code)));
- const symbol: u16 = switch (s.kind) {
- .literal => s.symbol,
- .end_of_block => 256,
- .match => @as(u16, s.symbol) + 257,
- };
-
- const c = enc.codes[symbol];
- try testing.expect(c.code == c_code);
- }
-
- // find each symbol by code
- for (enc.codes) |c| {
- if (c.len == 0) continue;
-
- const s_code: u15 = @bitReverse(@as(u15, @intCast(c.code)));
- const s = try dec.find(s_code);
- try testing.expect(s.code == s_code);
- try testing.expect(s.code_bits == c.len);
+ // Check that the example in RFC 1951 section 3.2.2 works (plus some zeroes)
+ const max_bits = 5;
+ var decoder: HuffmanDecoder(16, max_bits, 3) = .{};
+ try decoder.generate(&.{ 3, 3, 3, 3, 0, 0, 3, 2, 4, 4 });
+
+ inline for (0.., .{
+ @as(u3, 0b010),
+ @as(u3, 0b011),
+ @as(u3, 0b100),
+ @as(u3, 0b101),
+ @as(u0, 0),
+ @as(u0, 0),
+ @as(u3, 0b110),
+ @as(u2, 0b00),
+ @as(u4, 0b1110),
+ @as(u4, 0b1111),
+ }) |i, code| {
+ const bits = @bitSizeOf(@TypeOf(code));
+ if (bits == 0) continue;
+ for (0..1 << (max_bits - bits)) |extra| {
+ const full = (@as(u16, code) << (max_bits - bits)) | @as(u16, @intCast(extra));
+ const symbol = try decoder.find(full);
+ try testing.expectEqual(i, symbol.symbol);
+ try testing.expectEqual(bits, symbol.code_bits);
}
}
}
lib/std/compress/flate/HuffmanEncoder.zig
@@ -1,463 +0,0 @@
-const HuffmanEncoder = @This();
-const std = @import("std");
-const assert = std.debug.assert;
-const testing = std.testing;
-
-codes: []Code,
-// Reusable buffer with the longest possible frequency table.
-freq_cache: [max_num_frequencies + 1]LiteralNode,
-bit_count: [17]u32,
-lns: []LiteralNode, // sorted by literal, stored to avoid repeated allocation in generate
-lfs: []LiteralNode, // sorted by frequency, stored to avoid repeated allocation in generate
-
-pub const LiteralNode = struct {
- literal: u16,
- freq: u16,
-
- pub fn max() LiteralNode {
- return .{
- .literal = std.math.maxInt(u16),
- .freq = std.math.maxInt(u16),
- };
- }
-};
-
-pub const Code = struct {
- code: u16 = 0,
- len: u16 = 0,
-};
-
-/// The odd order in which the codegen code sizes are written.
-pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
-/// The number of codegen codes.
-pub const codegen_code_count = 19;
-
-/// The largest distance code.
-pub const distance_code_count = 30;
-
-/// Maximum number of literals.
-pub const max_num_lit = 286;
-
-/// Max number of frequencies used for a Huffman Code
-/// Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286).
-/// The largest of these is max_num_lit.
-pub const max_num_frequencies = max_num_lit;
-
-/// Biggest block size for uncompressed block.
-pub const max_store_block_size = 65535;
-/// The special code used to mark the end of a block.
-pub const end_block_marker = 256;
-
-/// Update this Huffman Code object to be the minimum code for the specified frequency count.
-///
-/// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
-/// max_bits The maximum number of bits to use for any literal.
-pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void {
- var list = self.freq_cache[0 .. freq.len + 1];
- // Number of non-zero literals
- var count: u32 = 0;
- // Set list to be the set of all non-zero literals and their frequencies
- for (freq, 0..) |f, i| {
- if (f != 0) {
- list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
- count += 1;
- } else {
- list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
- self.codes[i].len = 0;
- }
- }
- list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
-
- list = list[0..count];
- if (count <= 2) {
- // Handle the small cases here, because they are awkward for the general case code. With
- // two or fewer literals, everything has bit length 1.
- for (list, 0..) |node, i| {
- // "list" is in order of increasing literal value.
- self.codes[node.literal] = .{
- .code = @intCast(i),
- .len = 1,
- };
- }
- return;
- }
- self.lfs = list;
- std.mem.sort(LiteralNode, self.lfs, {}, byFreq);
-
- // Get the number of literals for each bit count
- const bit_count = self.bitCounts(list, max_bits);
- // And do the assignment
- self.assignEncodingAndSize(bit_count, list);
-}
-
-pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 {
- var total: u32 = 0;
- for (freq, 0..) |f, i| {
- if (f != 0) {
- total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
- }
- }
- return total;
-}
-
-/// Return the number of literals assigned to each bit size in the Huffman encoding
-///
-/// This method is only called when list.len >= 3
-/// The cases of 0, 1, and 2 literals are handled by special case code.
-///
-/// list: An array of the literals with non-zero frequencies
-/// and their associated frequencies. The array is in order of increasing
-/// frequency, and has as its last element a special element with frequency
-/// `math.maxInt(i32)`
-///
-/// max_bits: The maximum number of bits that should be used to encode any literal.
-/// Must be less than 16.
-///
-/// Returns an integer array in which array[i] indicates the number of literals
-/// that should be encoded in i bits.
-fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 {
- var max_bits = max_bits_to_use;
- const n = list.len;
- const max_bits_limit = 16;
-
- assert(max_bits < max_bits_limit);
-
- // The tree can't have greater depth than n - 1, no matter what. This
- // saves a little bit of work in some small cases
- max_bits = @min(max_bits, n - 1);
-
- // Create information about each of the levels.
- // A bogus "Level 0" whose sole purpose is so that
- // level1.prev.needed == 0. This makes level1.next_pair_freq
- // be a legitimate value that never gets chosen.
- var levels: [max_bits_limit]LevelInfo = std.mem.zeroes([max_bits_limit]LevelInfo);
- // leaf_counts[i] counts the number of literals at the left
- // of ancestors of the rightmost node at level i.
- // leaf_counts[i][j] is the number of literals at the left
- // of the level j ancestor.
- var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(@splat(0));
-
- {
- var level = @as(u32, 1);
- while (level <= max_bits) : (level += 1) {
- // For every level, the first two items are the first two characters.
- // We initialize the levels as if we had already figured this out.
- levels[level] = LevelInfo{
- .level = level,
- .last_freq = list[1].freq,
- .next_char_freq = list[2].freq,
- .next_pair_freq = list[0].freq + list[1].freq,
- .needed = 0,
- };
- leaf_counts[level][level] = 2;
- if (level == 1) {
- levels[level].next_pair_freq = std.math.maxInt(i32);
- }
- }
- }
-
- // We need a total of 2*n - 2 items at top level and have already generated 2.
- levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
-
- {
- var level = max_bits;
- while (true) {
- var l = &levels[level];
- if (l.next_pair_freq == std.math.maxInt(i32) and l.next_char_freq == std.math.maxInt(i32)) {
- // We've run out of both leaves and pairs.
- // End all calculations for this level.
- // To make sure we never come back to this level or any lower level,
- // set next_pair_freq impossibly large.
- l.needed = 0;
- levels[level + 1].next_pair_freq = std.math.maxInt(i32);
- level += 1;
- continue;
- }
-
- const prev_freq = l.last_freq;
- if (l.next_char_freq < l.next_pair_freq) {
- // The next item on this row is a leaf node.
- const next = leaf_counts[level][level] + 1;
- l.last_freq = l.next_char_freq;
- // Lower leaf_counts are the same of the previous node.
- leaf_counts[level][level] = next;
- if (next >= list.len) {
- l.next_char_freq = LiteralNode.max().freq;
- } else {
- l.next_char_freq = list[next].freq;
- }
- } else {
- // The next item on this row is a pair from the previous row.
- // next_pair_freq isn't valid until we generate two
- // more values in the level below
- l.last_freq = l.next_pair_freq;
- // Take leaf counts from the lower level, except counts[level] remains the same.
- @memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
- levels[l.level - 1].needed = 2;
- }
-
- l.needed -= 1;
- if (l.needed == 0) {
- // We've done everything we need to do for this level.
- // Continue calculating one level up. Fill in next_pair_freq
- // of that level with the sum of the two nodes we've just calculated on
- // this level.
- if (l.level == max_bits) {
- // All done!
- break;
- }
- levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
- level += 1;
- } else {
- // If we stole from below, move down temporarily to replenish it.
- while (levels[level - 1].needed > 0) {
- level -= 1;
- if (level == 0) {
- break;
- }
- }
- }
- }
- }
-
- // Somethings is wrong if at the end, the top level is null or hasn't used
- // all of the leaves.
- assert(leaf_counts[max_bits][max_bits] == n);
-
- var bit_count = self.bit_count[0 .. max_bits + 1];
- var bits: u32 = 1;
- const counts = &leaf_counts[max_bits];
- {
- var level = max_bits;
- while (level > 0) : (level -= 1) {
- // counts[level] gives the number of literals requiring at least "bits"
- // bits to encode.
- bit_count[bits] = counts[level] - counts[level - 1];
- bits += 1;
- if (level == 0) {
- break;
- }
- }
- }
- return bit_count;
-}
-
-/// Look at the leaves and assign them a bit count and an encoding as specified
-/// in RFC 1951 3.2.2
-fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void {
- var code = @as(u16, 0);
- var list = list_arg;
-
- for (bit_count, 0..) |bits, n| {
- code <<= 1;
- if (n == 0 or bits == 0) {
- continue;
- }
- // The literals list[list.len-bits] .. list[list.len-bits]
- // are encoded using "bits" bits, and get the values
- // code, code + 1, .... The code values are
- // assigned in literal order (not frequency order).
- const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
-
- self.lns = chunk;
- std.mem.sort(LiteralNode, self.lns, {}, byLiteral);
-
- for (chunk) |node| {
- self.codes[node.literal] = .{
- .code = bitReverse(u16, code, @as(u5, @intCast(n))),
- .len = @as(u16, @intCast(n)),
- };
- code += 1;
- }
- list = list[0 .. list.len - @as(u32, @intCast(bits))];
- }
-}
-
-fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
- _ = context;
- if (a.freq == b.freq) {
- return a.literal < b.literal;
- }
- return a.freq < b.freq;
-}
-
-/// Describes the state of the constructed tree for a given depth.
-const LevelInfo = struct {
- /// Our level. for better printing
- level: u32,
- /// The frequency of the last node at this level
- last_freq: u32,
- /// The frequency of the next character to add to this level
- next_char_freq: u32,
- /// The frequency of the next pair (from level below) to add to this level.
- /// Only valid if the "needed" value of the next lower level is 0.
- next_pair_freq: u32,
- /// The number of chains remaining to generate for this level before moving
- /// up to the next level
- needed: u32,
-};
-
-fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
- _ = context;
- return a.literal < b.literal;
-}
-
-/// Reverse bit-by-bit a N-bit code.
-fn bitReverse(comptime T: type, value: T, n: usize) T {
- const r = @bitReverse(value);
- return r >> @as(std.math.Log2Int(T), @intCast(@typeInfo(T).int.bits - n));
-}
-
-test bitReverse {
- const ReverseBitsTest = struct {
- in: u16,
- bit_count: u5,
- out: u16,
- };
-
- const reverse_bits_tests = [_]ReverseBitsTest{
- .{ .in = 1, .bit_count = 1, .out = 1 },
- .{ .in = 1, .bit_count = 2, .out = 2 },
- .{ .in = 1, .bit_count = 3, .out = 4 },
- .{ .in = 1, .bit_count = 4, .out = 8 },
- .{ .in = 1, .bit_count = 5, .out = 16 },
- .{ .in = 17, .bit_count = 5, .out = 17 },
- .{ .in = 257, .bit_count = 9, .out = 257 },
- .{ .in = 29, .bit_count = 5, .out = 23 },
- };
-
- for (reverse_bits_tests) |h| {
- const v = bitReverse(u16, h.in, h.bit_count);
- try std.testing.expectEqual(h.out, v);
- }
-}
-
-/// Generates a HuffmanCode corresponding to the fixed literal table
-pub fn fixedLiteralEncoder(codes: *[max_num_frequencies]Code) HuffmanEncoder {
- var h: HuffmanEncoder = undefined;
- h.codes = codes;
- var ch: u16 = 0;
-
- while (ch < max_num_frequencies) : (ch += 1) {
- var bits: u16 = undefined;
- var size: u16 = undefined;
- switch (ch) {
- 0...143 => {
- // size 8, 000110000 .. 10111111
- bits = ch + 48;
- size = 8;
- },
- 144...255 => {
- // size 9, 110010000 .. 111111111
- bits = ch + 400 - 144;
- size = 9;
- },
- 256...279 => {
- // size 7, 0000000 .. 0010111
- bits = ch - 256;
- size = 7;
- },
- else => {
- // size 8, 11000000 .. 11000111
- bits = ch + 192 - 280;
- size = 8;
- },
- }
- h.codes[ch] = .{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
- }
- return h;
-}
-
-pub fn fixedDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder {
- var h: HuffmanEncoder = undefined;
- h.codes = codes;
- for (h.codes, 0..) |_, ch| {
- h.codes[ch] = .{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
- }
- return h;
-}
-
-pub fn huffmanDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder {
- var distance_freq: [distance_code_count]u16 = @splat(0);
- distance_freq[0] = 1;
- // huff_distance is a static distance encoder used for huffman only encoding.
- // It can be reused since we will not be encoding distance values.
- var h: HuffmanEncoder = .{};
- h.codes = codes;
- h.generate(distance_freq[0..], 15);
- return h;
-}
-
-test "generate a Huffman code for the fixed literal table specific to Deflate" {
- var codes: [max_num_frequencies]Code = undefined;
- const enc: HuffmanEncoder = .fixedLiteralEncoder(&codes);
- for (enc.codes) |c| {
- switch (c.len) {
- 7 => {
- const v = @bitReverse(@as(u7, @intCast(c.code)));
- try testing.expect(v <= 0b0010111);
- },
- 8 => {
- const v = @bitReverse(@as(u8, @intCast(c.code)));
- try testing.expect((v >= 0b000110000 and v <= 0b10111111) or
- (v >= 0b11000000 and v <= 11000111));
- },
- 9 => {
- const v = @bitReverse(@as(u9, @intCast(c.code)));
- try testing.expect(v >= 0b110010000 and v <= 0b111111111);
- },
- else => unreachable,
- }
- }
-}
-
-test "generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" {
- var codes: [distance_code_count]Code = undefined;
- const enc = fixedDistanceEncoder(&codes);
- for (enc.codes) |c| {
- const v = @bitReverse(@as(u5, @intCast(c.code)));
- try testing.expect(v <= 29);
- try testing.expect(c.len == 5);
- }
-}
-
-pub const fixed_codes = [_]u8{
- 0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100,
- 0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100,
- 0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010,
- 0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010,
- 0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010,
- 0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010,
- 0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110,
- 0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110,
- 0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110,
- 0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110,
- 0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001,
- 0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001,
- 0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001,
- 0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001,
- 0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101,
- 0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101,
- 0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101,
- 0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101,
- 0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100,
- 0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110,
- 0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100,
- 0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101,
- 0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010,
- 0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110,
- 0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110,
- 0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111,
- 0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011,
- 0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101,
- 0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110,
- 0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100,
- 0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101,
- 0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010,
- 0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110,
- 0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000,
- 0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010,
- 0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100,
- 0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011,
- 0b10100011,
-};
lib/std/compress/flate/Lookup.zig
@@ -1,130 +0,0 @@
-//! Lookup of the previous locations for the same 4 byte data. Works on hash of
-//! 4 bytes data. Head contains position of the first match for each hash. Chain
-//! points to the previous position of the same hash given the current location.
-
-const std = @import("std");
-const testing = std.testing;
-const expect = testing.expect;
-const flate = @import("../flate.zig");
-const Token = @import("Token.zig");
-
-const Lookup = @This();
-
-const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
-const chain_len = 2 * flate.history_len;
-
-pub const bits = 15;
-pub const len = 1 << bits;
-pub const shift = 32 - bits;
-
-// Maps hash => first position
-head: [len]u16 = [_]u16{0} ** len,
-// Maps position => previous positions for the same hash value
-chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
-
-// Calculates hash of the 4 bytes from data.
-// Inserts `pos` position of that hash in the lookup tables.
-// Returns previous location with the same hash value.
-pub fn add(self: *Lookup, data: []const u8, pos: u16) u16 {
- if (data.len < 4) return 0;
- const h = hash(data[0..4]);
- return self.set(h, pos);
-}
-
-// Returns previous location with the same hash value given the current
-// position.
-pub fn prev(self: *Lookup, pos: u16) u16 {
- return self.chain[pos];
-}
-
-fn set(self: *Lookup, h: u32, pos: u16) u16 {
- const p = self.head[h];
- self.head[h] = pos;
- self.chain[pos] = p;
- return p;
-}
-
-// Slide all positions in head and chain for `n`
-pub fn slide(self: *Lookup, n: u16) void {
- for (&self.head) |*v| {
- v.* -|= n;
- }
- var i: usize = 0;
- while (i < n) : (i += 1) {
- self.chain[i] = self.chain[i + n] -| n;
- }
-}
-
-// Add `len` 4 bytes hashes from `data` into lookup.
-// Position of the first byte is `pos`.
-pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void {
- if (length == 0 or data.len < Token.min_length) {
- return;
- }
- var hb =
- @as(u32, data[3]) |
- @as(u32, data[2]) << 8 |
- @as(u32, data[1]) << 16 |
- @as(u32, data[0]) << 24;
- _ = self.set(hashu(hb), pos);
-
- var i = pos;
- for (4..@min(length + 3, data.len)) |j| {
- hb = (hb << 8) | @as(u32, data[j]);
- i += 1;
- _ = self.set(hashu(hb), i);
- }
-}
-
-// Calculates hash of the first 4 bytes of `b`.
-fn hash(b: *const [4]u8) u32 {
- return hashu(@as(u32, b[3]) |
- @as(u32, b[2]) << 8 |
- @as(u32, b[1]) << 16 |
- @as(u32, b[0]) << 24);
-}
-
-fn hashu(v: u32) u32 {
- return @intCast((v *% prime4) >> shift);
-}
-
-test add {
- const data = [_]u8{
- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
- 0x01, 0x02, 0x03,
- };
-
- var h: Lookup = .{};
- for (data, 0..) |_, i| {
- const p = h.add(data[i..], @intCast(i));
- if (i >= 8 and i < 24) {
- try expect(p == i - 8);
- } else {
- try expect(p == 0);
- }
- }
-
- const v = Lookup.hash(data[2 .. 2 + 4]);
- try expect(h.head[v] == 2 + 16);
- try expect(h.chain[2 + 16] == 2 + 8);
- try expect(h.chain[2 + 8] == 2);
-}
-
-test bulkAdd {
- const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
-
- // one by one
- var h: Lookup = .{};
- for (data, 0..) |_, i| {
- _ = h.add(data[i..], @intCast(i));
- }
-
- // in bulk
- var bh: Lookup = .{};
- bh.bulkAdd(data, data.len, 0);
-
- try testing.expectEqualSlices(u16, &h.head, &bh.head);
- try testing.expectEqualSlices(u16, &h.chain, &bh.chain);
-}
lib/std/compress/flate/Token.zig
@@ -1,333 +0,0 @@
-//! Token cat be literal: single byte of data or match; reference to the slice of
-//! data in the same stream represented with <length, distance>. Where length
-//! can be 3 - 258 bytes, and distance 1 - 32768 bytes.
-//!
-const std = @import("std");
-const assert = std.debug.assert;
-const print = std.debug.print;
-const expect = std.testing.expect;
-
-const Token = @This();
-
-pub const Kind = enum(u1) {
- literal,
- match,
-};
-
-// Distance range 1 - 32768, stored in dist as 0 - 32767 (fits u15)
-dist: u15 = 0,
-// Length range 3 - 258, stored in len_lit as 0 - 255 (fits u8)
-len_lit: u8 = 0,
-kind: Kind = .literal,
-
-pub const base_length = 3; // smallest match length per the RFC section 3.2.5
-pub const min_length = 4; // min length used in this algorithm
-pub const max_length = 258;
-
-pub const min_distance = 1;
-pub const max_distance = std.compress.flate.history_len;
-
-pub fn literal(t: Token) u8 {
- return t.len_lit;
-}
-
-pub fn distance(t: Token) u16 {
- return @as(u16, t.dist) + min_distance;
-}
-
-pub fn length(t: Token) u16 {
- return @as(u16, t.len_lit) + base_length;
-}
-
-pub fn initLiteral(lit: u8) Token {
- return .{ .kind = .literal, .len_lit = lit };
-}
-
-// distance range 1 - 32768, stored in dist as 0 - 32767 (u15)
-// length range 3 - 258, stored in len_lit as 0 - 255 (u8)
-pub fn initMatch(dist: u16, len: u16) Token {
- assert(len >= min_length and len <= max_length);
- assert(dist >= min_distance and dist <= max_distance);
- return .{
- .kind = .match,
- .dist = @intCast(dist - min_distance),
- .len_lit = @intCast(len - base_length),
- };
-}
-
-pub fn eql(t: Token, o: Token) bool {
- return t.kind == o.kind and
- t.dist == o.dist and
- t.len_lit == o.len_lit;
-}
-
-pub fn lengthCode(t: Token) u16 {
- return match_lengths[match_lengths_index[t.len_lit]].code;
-}
-
-pub fn lengthEncoding(t: Token) MatchLength {
- var c = match_lengths[match_lengths_index[t.len_lit]];
- c.extra_length = t.len_lit - c.base_scaled;
- return c;
-}
-
-// Returns the distance code corresponding to a specific distance.
-// Distance code is in range: 0 - 29.
-pub fn distanceCode(t: Token) u8 {
- var dist: u16 = t.dist;
- if (dist < match_distances_index.len) {
- return match_distances_index[dist];
- }
- dist >>= 7;
- if (dist < match_distances_index.len) {
- return match_distances_index[dist] + 14;
- }
- dist >>= 7;
- return match_distances_index[dist] + 28;
-}
-
-pub fn distanceEncoding(t: Token) MatchDistance {
- var c = match_distances[t.distanceCode()];
- c.extra_distance = t.dist - c.base_scaled;
- return c;
-}
-
-pub fn lengthExtraBits(code: u32) u8 {
- return match_lengths[code - length_codes_start].extra_bits;
-}
-
-pub fn matchLength(code: u8) MatchLength {
- return match_lengths[code];
-}
-
-pub fn matchDistance(code: u8) MatchDistance {
- return match_distances[code];
-}
-
-pub fn distanceExtraBits(code: u32) u8 {
- return match_distances[code].extra_bits;
-}
-
-pub fn show(t: Token) void {
- if (t.kind == .literal) {
- print("L('{c}'), ", .{t.literal()});
- } else {
- print("M({d}, {d}), ", .{ t.distance(), t.length() });
- }
-}
-
-// Returns index in match_lengths table for each length in range 0-255.
-const match_lengths_index = [_]u8{
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
- 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
- 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
- 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
- 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
- 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
- 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 28,
-};
-
-const MatchLength = struct {
- code: u16,
- base_scaled: u8, // base - 3, scaled to fit into u8 (0-255), same as lit_len field in Token.
- base: u16, // 3-258
- extra_length: u8 = 0,
- extra_bits: u4,
-};
-
-// match_lengths represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12)
-//
-// Extra Extra Extra
-// Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
-// ---- ---- ------ ---- ---- ------- ---- ---- -------
-// 257 0 3 267 1 15,16 277 4 67-82
-// 258 0 4 268 1 17,18 278 4 83-98
-// 259 0 5 269 2 19-22 279 4 99-114
-// 260 0 6 270 2 23-26 280 4 115-130
-// 261 0 7 271 2 27-30 281 5 131-162
-// 262 0 8 272 2 31-34 282 5 163-194
-// 263 0 9 273 3 35-42 283 5 195-226
-// 264 0 10 274 3 43-50 284 5 227-257
-// 265 1 11,12 275 3 51-58 285 0 258
-// 266 1 13,14 276 3 59-66
-//
-pub const length_codes_start = 257;
-
-const match_lengths = [_]MatchLength{
- .{ .extra_bits = 0, .base_scaled = 0, .base = 3, .code = 257 },
- .{ .extra_bits = 0, .base_scaled = 1, .base = 4, .code = 258 },
- .{ .extra_bits = 0, .base_scaled = 2, .base = 5, .code = 259 },
- .{ .extra_bits = 0, .base_scaled = 3, .base = 6, .code = 260 },
- .{ .extra_bits = 0, .base_scaled = 4, .base = 7, .code = 261 },
- .{ .extra_bits = 0, .base_scaled = 5, .base = 8, .code = 262 },
- .{ .extra_bits = 0, .base_scaled = 6, .base = 9, .code = 263 },
- .{ .extra_bits = 0, .base_scaled = 7, .base = 10, .code = 264 },
- .{ .extra_bits = 1, .base_scaled = 8, .base = 11, .code = 265 },
- .{ .extra_bits = 1, .base_scaled = 10, .base = 13, .code = 266 },
- .{ .extra_bits = 1, .base_scaled = 12, .base = 15, .code = 267 },
- .{ .extra_bits = 1, .base_scaled = 14, .base = 17, .code = 268 },
- .{ .extra_bits = 2, .base_scaled = 16, .base = 19, .code = 269 },
- .{ .extra_bits = 2, .base_scaled = 20, .base = 23, .code = 270 },
- .{ .extra_bits = 2, .base_scaled = 24, .base = 27, .code = 271 },
- .{ .extra_bits = 2, .base_scaled = 28, .base = 31, .code = 272 },
- .{ .extra_bits = 3, .base_scaled = 32, .base = 35, .code = 273 },
- .{ .extra_bits = 3, .base_scaled = 40, .base = 43, .code = 274 },
- .{ .extra_bits = 3, .base_scaled = 48, .base = 51, .code = 275 },
- .{ .extra_bits = 3, .base_scaled = 56, .base = 59, .code = 276 },
- .{ .extra_bits = 4, .base_scaled = 64, .base = 67, .code = 277 },
- .{ .extra_bits = 4, .base_scaled = 80, .base = 83, .code = 278 },
- .{ .extra_bits = 4, .base_scaled = 96, .base = 99, .code = 279 },
- .{ .extra_bits = 4, .base_scaled = 112, .base = 115, .code = 280 },
- .{ .extra_bits = 5, .base_scaled = 128, .base = 131, .code = 281 },
- .{ .extra_bits = 5, .base_scaled = 160, .base = 163, .code = 282 },
- .{ .extra_bits = 5, .base_scaled = 192, .base = 195, .code = 283 },
- .{ .extra_bits = 5, .base_scaled = 224, .base = 227, .code = 284 },
- .{ .extra_bits = 0, .base_scaled = 255, .base = 258, .code = 285 },
-};
-
-// Used in distanceCode fn to get index in match_distance table for each distance in range 0-32767.
-const match_distances_index = [_]u8{
- 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
- 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-};
-
-const MatchDistance = struct {
- base_scaled: u16, // base - 1, same as Token dist field
- base: u16,
- extra_distance: u16 = 0,
- code: u8,
- extra_bits: u4,
-};
-
-// match_distances represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12)
-//
-// Extra Extra Extra
-// Code Bits Dist Code Bits Dist Code Bits Distance
-// ---- ---- ---- ---- ---- ------ ---- ---- --------
-// 0 0 1 10 4 33-48 20 9 1025-1536
-// 1 0 2 11 4 49-64 21 9 1537-2048
-// 2 0 3 12 5 65-96 22 10 2049-3072
-// 3 0 4 13 5 97-128 23 10 3073-4096
-// 4 1 5,6 14 6 129-192 24 11 4097-6144
-// 5 1 7,8 15 6 193-256 25 11 6145-8192
-// 6 2 9-12 16 7 257-384 26 12 8193-12288
-// 7 2 13-16 17 7 385-512 27 12 12289-16384
-// 8 3 17-24 18 8 513-768 28 13 16385-24576
-// 9 3 25-32 19 8 769-1024 29 13 24577-32768
-//
-const match_distances = [_]MatchDistance{
- .{ .extra_bits = 0, .base_scaled = 0x0000, .code = 0, .base = 1 },
- .{ .extra_bits = 0, .base_scaled = 0x0001, .code = 1, .base = 2 },
- .{ .extra_bits = 0, .base_scaled = 0x0002, .code = 2, .base = 3 },
- .{ .extra_bits = 0, .base_scaled = 0x0003, .code = 3, .base = 4 },
- .{ .extra_bits = 1, .base_scaled = 0x0004, .code = 4, .base = 5 },
- .{ .extra_bits = 1, .base_scaled = 0x0006, .code = 5, .base = 7 },
- .{ .extra_bits = 2, .base_scaled = 0x0008, .code = 6, .base = 9 },
- .{ .extra_bits = 2, .base_scaled = 0x000c, .code = 7, .base = 13 },
- .{ .extra_bits = 3, .base_scaled = 0x0010, .code = 8, .base = 17 },
- .{ .extra_bits = 3, .base_scaled = 0x0018, .code = 9, .base = 25 },
- .{ .extra_bits = 4, .base_scaled = 0x0020, .code = 10, .base = 33 },
- .{ .extra_bits = 4, .base_scaled = 0x0030, .code = 11, .base = 49 },
- .{ .extra_bits = 5, .base_scaled = 0x0040, .code = 12, .base = 65 },
- .{ .extra_bits = 5, .base_scaled = 0x0060, .code = 13, .base = 97 },
- .{ .extra_bits = 6, .base_scaled = 0x0080, .code = 14, .base = 129 },
- .{ .extra_bits = 6, .base_scaled = 0x00c0, .code = 15, .base = 193 },
- .{ .extra_bits = 7, .base_scaled = 0x0100, .code = 16, .base = 257 },
- .{ .extra_bits = 7, .base_scaled = 0x0180, .code = 17, .base = 385 },
- .{ .extra_bits = 8, .base_scaled = 0x0200, .code = 18, .base = 513 },
- .{ .extra_bits = 8, .base_scaled = 0x0300, .code = 19, .base = 769 },
- .{ .extra_bits = 9, .base_scaled = 0x0400, .code = 20, .base = 1025 },
- .{ .extra_bits = 9, .base_scaled = 0x0600, .code = 21, .base = 1537 },
- .{ .extra_bits = 10, .base_scaled = 0x0800, .code = 22, .base = 2049 },
- .{ .extra_bits = 10, .base_scaled = 0x0c00, .code = 23, .base = 3073 },
- .{ .extra_bits = 11, .base_scaled = 0x1000, .code = 24, .base = 4097 },
- .{ .extra_bits = 11, .base_scaled = 0x1800, .code = 25, .base = 6145 },
- .{ .extra_bits = 12, .base_scaled = 0x2000, .code = 26, .base = 8193 },
- .{ .extra_bits = 12, .base_scaled = 0x3000, .code = 27, .base = 12289 },
- .{ .extra_bits = 13, .base_scaled = 0x4000, .code = 28, .base = 16385 },
- .{ .extra_bits = 13, .base_scaled = 0x6000, .code = 29, .base = 24577 },
-};
-
-test "size" {
- try expect(@sizeOf(Token) == 4);
-}
-
-// testing table https://datatracker.ietf.org/doc/html/rfc1951#page-12
-test "MatchLength" {
- var c = Token.initMatch(1, 4).lengthEncoding();
- try expect(c.code == 258);
- try expect(c.extra_bits == 0);
- try expect(c.extra_length == 0);
-
- c = Token.initMatch(1, 11).lengthEncoding();
- try expect(c.code == 265);
- try expect(c.extra_bits == 1);
- try expect(c.extra_length == 0);
-
- c = Token.initMatch(1, 12).lengthEncoding();
- try expect(c.code == 265);
- try expect(c.extra_bits == 1);
- try expect(c.extra_length == 1);
-
- c = Token.initMatch(1, 130).lengthEncoding();
- try expect(c.code == 280);
- try expect(c.extra_bits == 4);
- try expect(c.extra_length == 130 - 115);
-}
-
-test "MatchDistance" {
- var c = Token.initMatch(1, 4).distanceEncoding();
- try expect(c.code == 0);
- try expect(c.extra_bits == 0);
- try expect(c.extra_distance == 0);
-
- c = Token.initMatch(192, 4).distanceEncoding();
- try expect(c.code == 14);
- try expect(c.extra_bits == 6);
- try expect(c.extra_distance == 192 - 129);
-}
-
-test "match_lengths" {
- for (match_lengths, 0..) |ml, i| {
- try expect(@as(u16, ml.base_scaled) + 3 == ml.base);
- try expect(i + 257 == ml.code);
- }
-
- for (match_distances, 0..) |mo, i| {
- try expect(mo.base_scaled + 1 == mo.base);
- try expect(i == mo.code);
- }
-}
lib/std/compress/flate/token.zig
@@ -0,0 +1,286 @@
+const std = @import("std");
+const builtin = @import("builtin");
+
+pub const min_length = 3;
+pub const max_length = 258;
+
+pub const min_distance = 1;
+pub const max_distance = std.compress.flate.history_len;
+
+pub const codegen_order: [19]u8 = .{
+ 16, 17, 18,
+ 0, 8, //
+ 7, 9,
+ 6, 10,
+ 5, 11,
+ 4, 12,
+ 3, 13,
+ 2, 14,
+ 1, 15,
+};
+
+pub const fixed_lit_codes = fixed_lit[0];
+pub const fixed_lit_bits = fixed_lit[1];
+const fixed_lit = blk: {
+ var codes: [286]u16 = undefined;
+ var bits: [286]u4 = undefined;
+
+ for (0..143 + 1, 0b00110000..0b10111111 + 1) |i, v| {
+ codes[i] = @bitReverse(@as(u8, v));
+ bits[i] = 8;
+ }
+ for (144..255 + 1, 0b110010000..0b111111111 + 1) |i, v| {
+ codes[i] = @bitReverse(@as(u9, v));
+ bits[i] = 9;
+ }
+ for (256..279 + 1, 0b0000000..0b0010111 + 1) |i, v| {
+ codes[i] = @bitReverse(@as(u7, v));
+ bits[i] = 7;
+ }
+ for (280..287 - 2 + 1, 0b11000000..0b11000111 - 2 + 1) |i, v| {
+ codes[i] = @bitReverse(@as(u8, v));
+ bits[i] = 8;
+ }
+ break :blk .{ codes, bits };
+};
+
+pub const fixed_dist_codes = fixed_dist[0];
+pub const fixed_dist_bits = fixed_dist[1];
+const fixed_dist = blk: {
+ var codes: [30]u16 = undefined;
+ const bits: [30]u4 = @splat(5);
+
+ for (0..30) |i| {
+ codes[i] = @bitReverse(@as(u5, i));
+ }
+ break :blk .{ codes, bits };
+};
+
+// All paramters of codes can be derived matchematically, however some are faster to
+// do via lookup table. For ReleaseSmall, we do all mathematically to save space.
+pub const LenCode = if (builtin.mode != .ReleaseSmall) LookupLenCode else ShortLenCode;
+pub const DistCode = if (builtin.mode != .ReleaseSmall) LookupDistCode else ShortDistCode;
+const ShortLenCode = ShortCode(u8, u2, u3, true);
+const ShortDistCode = ShortCode(u15, u1, u4, false);
+/// For length and distance codes, they having this format.
+///
+/// For example, length code 0b1101 (13 or literal 270) has high_bits=0b01 and high_log2=3
+/// and is 1_01_xx (2 extra bits). It is then offsetted by the min length of 3.
+/// ^ bit 4 = 2 + high_log2 - 1
+///
+/// An exception is Length codes, where value 255 is assigned the special zero-bit code 28 or
+/// literal 285.
+fn ShortCode(Value: type, HighBits: type, HighLog2: type, len_special: bool) type {
+ return packed struct(u5) {
+ /// Bits preceding high bit or start if none
+ high_bits: HighBits,
+ /// High bit, 0 means none, otherwise it is at bit `x + high_log2 - 1`
+ high_log2: HighLog2,
+
+ pub fn fromVal(v: Value) @This() {
+ if (len_special and v == 255) return .fromInt(28);
+ const high_bits = @bitSizeOf(HighBits) + 1;
+ const bits = @bitSizeOf(Value) - @clz(v);
+ if (bits <= high_bits) return @bitCast(@as(u5, @intCast(v)));
+ const high = v >> @intCast(bits - high_bits);
+ return .{ .high_bits = @truncate(high), .high_log2 = @intCast(bits - high_bits + 1) };
+ }
+
+ /// `@ctz(return) >= extraBits()`
+ pub fn base(c: @This()) Value {
+ if (len_special and c.toInt() == 28) return 255;
+ if (c.high_log2 <= 1) return @as(u5, @bitCast(c));
+ const high_value = (@as(Value, @intFromBool(c.high_log2 != 0)) << @bitSizeOf(HighBits)) | c.high_bits;
+ const high_start = @as(std.math.Log2Int(Value), c.high_log2 - 1);
+ return @shlExact(high_value, high_start);
+ }
+
+ const max_extra = @bitSizeOf(Value) - (1 + @bitSizeOf(HighLog2));
+ pub fn extraBits(c: @This()) std.math.IntFittingRange(0, max_extra) {
+ if (len_special and c.toInt() == 28) return 0;
+ return @intCast(c.high_log2 -| 1);
+ }
+
+ pub fn toInt(c: @This()) u5 {
+ return @bitCast(c);
+ }
+
+ pub fn fromInt(x: u5) @This() {
+ return @bitCast(x);
+ }
+ };
+}
+
+const LookupLenCode = packed struct(u5) {
+ code: ShortLenCode,
+
+ const code_table = table: {
+ var codes: [256]ShortLenCode = undefined;
+ for (0.., &codes) |v, *c| {
+ c.* = .fromVal(v);
+ }
+ break :table codes;
+ };
+
+ const base_table = table: {
+ var bases: [29]u8 = undefined;
+ for (0.., &bases) |c, *b| {
+ b.* = ShortLenCode.fromInt(c).base();
+ }
+ break :table bases;
+ };
+
+ pub fn fromVal(v: u8) LookupLenCode {
+ return .{ .code = code_table[v] };
+ }
+
+ /// `@ctz(return) >= extraBits()`
+ pub fn base(c: LookupLenCode) u8 {
+ return base_table[c.toInt()];
+ }
+
+ pub fn extraBits(c: LookupLenCode) u3 {
+ return c.code.extraBits();
+ }
+
+ pub fn toInt(c: LookupLenCode) u5 {
+ return @bitCast(c);
+ }
+
+ pub fn fromInt(x: u5) LookupLenCode {
+ return @bitCast(x);
+ }
+};
+
+const LookupDistCode = packed struct(u5) {
+ code: ShortDistCode,
+
+ const base_table = table: {
+ var bases: [30]u15 = undefined;
+ for (0.., &bases) |c, *b| {
+ b.* = ShortDistCode.fromInt(c).base();
+ }
+ break :table bases;
+ };
+
+ pub fn fromVal(v: u15) LookupDistCode {
+ return .{ .code = .fromVal(v) };
+ }
+
+ /// `@ctz(return) >= extraBits()`
+ pub fn base(c: LookupDistCode) u15 {
+ return base_table[c.toInt()];
+ }
+
+ pub fn extraBits(c: LookupDistCode) u4 {
+ return c.code.extraBits();
+ }
+
+ pub fn toInt(c: LookupDistCode) u5 {
+ return @bitCast(c);
+ }
+
+ pub fn fromInt(x: u5) LookupDistCode {
+ return @bitCast(x);
+ }
+};
+
+test LenCode {
+ inline for ([_]type{ ShortLenCode, LookupLenCode }) |Code| {
+ // Check against the RFC 1951 table
+ for (0.., [_]struct {
+ base: u8,
+ extra_bits: u4,
+ }{
+ // zig fmt: off
+ .{ .base = 3 - min_length, .extra_bits = 0 },
+ .{ .base = 4 - min_length, .extra_bits = 0 },
+ .{ .base = 5 - min_length, .extra_bits = 0 },
+ .{ .base = 6 - min_length, .extra_bits = 0 },
+ .{ .base = 7 - min_length, .extra_bits = 0 },
+ .{ .base = 8 - min_length, .extra_bits = 0 },
+ .{ .base = 9 - min_length, .extra_bits = 0 },
+ .{ .base = 10 - min_length, .extra_bits = 0 },
+ .{ .base = 11 - min_length, .extra_bits = 1 },
+ .{ .base = 13 - min_length, .extra_bits = 1 },
+ .{ .base = 15 - min_length, .extra_bits = 1 },
+ .{ .base = 17 - min_length, .extra_bits = 1 },
+ .{ .base = 19 - min_length, .extra_bits = 2 },
+ .{ .base = 23 - min_length, .extra_bits = 2 },
+ .{ .base = 27 - min_length, .extra_bits = 2 },
+ .{ .base = 31 - min_length, .extra_bits = 2 },
+ .{ .base = 35 - min_length, .extra_bits = 3 },
+ .{ .base = 43 - min_length, .extra_bits = 3 },
+ .{ .base = 51 - min_length, .extra_bits = 3 },
+ .{ .base = 59 - min_length, .extra_bits = 3 },
+ .{ .base = 67 - min_length, .extra_bits = 4 },
+ .{ .base = 83 - min_length, .extra_bits = 4 },
+ .{ .base = 99 - min_length, .extra_bits = 4 },
+ .{ .base = 115 - min_length, .extra_bits = 4 },
+ .{ .base = 131 - min_length, .extra_bits = 5 },
+ .{ .base = 163 - min_length, .extra_bits = 5 },
+ .{ .base = 195 - min_length, .extra_bits = 5 },
+ .{ .base = 227 - min_length, .extra_bits = 5 },
+ .{ .base = 258 - min_length, .extra_bits = 0 },
+ }) |code, params| {
+ // zig fmt: on
+ const c: u5 = @intCast(code);
+ try std.testing.expectEqual(params.extra_bits, Code.extraBits(.fromInt(@intCast(c))));
+ try std.testing.expectEqual(params.base, Code.base(.fromInt(@intCast(c))));
+ for (params.base..params.base + @shlExact(@as(u16, 1), params.extra_bits) -
+ @intFromBool(c == 27)) |v|
+ {
+ try std.testing.expectEqual(c, Code.fromVal(@intCast(v)).toInt());
+ }
+ }
+ }
+}
+
+test DistCode {
+ inline for ([_]type{ ShortDistCode, LookupDistCode }) |Code| {
+ for (0.., [_]struct {
+ base: u15,
+ extra_bits: u4,
+ }{
+ // zig fmt: off
+ .{ .base = 1 - min_distance, .extra_bits = 0 },
+ .{ .base = 2 - min_distance, .extra_bits = 0 },
+ .{ .base = 3 - min_distance, .extra_bits = 0 },
+ .{ .base = 4 - min_distance, .extra_bits = 0 },
+ .{ .base = 5 - min_distance, .extra_bits = 1 },
+ .{ .base = 7 - min_distance, .extra_bits = 1 },
+ .{ .base = 9 - min_distance, .extra_bits = 2 },
+ .{ .base = 13 - min_distance, .extra_bits = 2 },
+ .{ .base = 17 - min_distance, .extra_bits = 3 },
+ .{ .base = 25 - min_distance, .extra_bits = 3 },
+ .{ .base = 33 - min_distance, .extra_bits = 4 },
+ .{ .base = 49 - min_distance, .extra_bits = 4 },
+ .{ .base = 65 - min_distance, .extra_bits = 5 },
+ .{ .base = 97 - min_distance, .extra_bits = 5 },
+ .{ .base = 129 - min_distance, .extra_bits = 6 },
+ .{ .base = 193 - min_distance, .extra_bits = 6 },
+ .{ .base = 257 - min_distance, .extra_bits = 7 },
+ .{ .base = 385 - min_distance, .extra_bits = 7 },
+ .{ .base = 513 - min_distance, .extra_bits = 8 },
+ .{ .base = 769 - min_distance, .extra_bits = 8 },
+ .{ .base = 1025 - min_distance, .extra_bits = 9 },
+ .{ .base = 1537 - min_distance, .extra_bits = 9 },
+ .{ .base = 2049 - min_distance, .extra_bits = 10 },
+ .{ .base = 3073 - min_distance, .extra_bits = 10 },
+ .{ .base = 4097 - min_distance, .extra_bits = 11 },
+ .{ .base = 6145 - min_distance, .extra_bits = 11 },
+ .{ .base = 8193 - min_distance, .extra_bits = 12 },
+ .{ .base = 12289 - min_distance, .extra_bits = 12 },
+ .{ .base = 16385 - min_distance, .extra_bits = 13 },
+ .{ .base = 24577 - min_distance, .extra_bits = 13 },
+ }) |code, params| {
+ // zig fmt: on
+ const c: u5 = @intCast(code);
+ try std.testing.expectEqual(params.extra_bits, Code.extraBits(.fromInt(@intCast(c))));
+ try std.testing.expectEqual(params.base, Code.base(.fromInt(@intCast(c))));
+ for (params.base..params.base + @shlExact(@as(u16, 1), params.extra_bits)) |v| {
+ try std.testing.expectEqual(c, Code.fromVal(@intCast(v)).toInt());
+ }
+ }
+ }
+}
lib/std/compress/flate.zig
@@ -1,8 +1,7 @@
const std = @import("../std.zig");
-/// When decompressing, the output buffer is used as the history window, so
-/// less than this may result in failure to decompress streams that were
-/// compressed with a larger window.
+/// When compressing and decompressing, the provided buffer is used as the
+/// history window, so it must be at least this size.
pub const max_window_len = history_len * 2;
pub const history_len = 32768;
@@ -15,10 +14,6 @@ pub const Compress = @import("flate/Compress.zig");
/// produces the original full-size data.
pub const Decompress = @import("flate/Decompress.zig");
-/// Compression without Lempel-Ziv match searching. Faster compression, less
-/// memory requirements but bigger compressed sizes.
-pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
-
/// Container of the deflate bit stream body. Container adds header before
/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
/// no footer, raw bit stream).
@@ -112,28 +107,24 @@ pub const Container = enum {
switch (h.*) {
.raw => {},
.gzip => |*gzip| {
- gzip.update(buf);
- gzip.count +%= buf.len;
+ gzip.crc.update(buf);
+ gzip.count +%= @truncate(buf.len);
},
.zlib => |*zlib| {
zlib.update(buf);
},
- inline .gzip, .zlib => |*x| x.update(buf),
}
}
pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
- var bits: [4]u8 = undefined;
switch (hasher.*) {
.gzip => |*gzip| {
// GZIP 8 bytes footer
// - 4 bytes, CRC32 (CRC-32)
- // - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
- std.mem.writeInt(u32, &bits, gzip.final(), .little);
- try writer.writeAll(&bits);
-
- std.mem.writeInt(u32, &bits, gzip.bytes_read, .little);
- try writer.writeAll(&bits);
+ // - 4 bytes, ISIZE (Input SIZE) - size of the original
+ // (uncompressed) input data modulo 2^32
+ try writer.writeInt(u32, gzip.crc.final(), .little);
+ try writer.writeInt(u32, gzip.count, .little);
},
.zlib => |*zlib| {
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
@@ -141,8 +132,7 @@ pub const Container = enum {
// Checksum value of the uncompressed data (excluding any
// dictionary data) computed according to Adler-32
// algorithm.
- std.mem.writeInt(u32, &bits, zlib.final, .big);
- try writer.writeAll(&bits);
+ try writer.writeInt(u32, zlib.adler, .big);
},
.raw => {},
}
@@ -174,7 +164,6 @@ pub const Container = enum {
};
test {
- _ = HuffmanEncoder;
_ = Compress;
_ = Decompress;
}